-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from Studio-Yandex-Practicum/feature/union_doc…
…x_&_xlxs_parsers Исправил конфликты, добавил poetry run
- Loading branch information
Showing
10 changed files
with
298 additions
and
220 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,3 +158,4 @@ cython_debug/ | |
# Static | ||
static/ | ||
|
||
/adaptive_hockey_federation/parser/Именная заявка/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
from pprint import pprint | ||
|
||
import click | ||
import docx # type: ignore | ||
|
||
from adaptive_hockey_federation.parser.docx_parser import ( | ||
docx_parser, | ||
find_numeric_statuses, | ||
) | ||
from adaptive_hockey_federation.parser.xlsx_parser import xlsx_parser | ||
|
||
NUMERIC_STATUSES = 'Числовые статусы следж-хоккей 02.10.203.docx' | ||
FILES_BLACK_LIST = [ | ||
'На мандатную комиссию', | ||
'Именная заявка следж-хоккей Энергия Жизни Сочи', | ||
'ФАХ Сияжар Взрослые', | ||
'Числовые статусы следж-хоккей 02.10.203', | ||
] | ||
FILES_EXTENSIONS = [ | ||
'.docx', | ||
'.xlsx', | ||
] | ||
NUMERIC_STATUSES_FILE_ERROR = ('Не могу найти {}. Без него не' | ||
' получиться загрузить именные заявки.' | ||
' Файл должен находиться в директории с' | ||
' файлами для парсинга') | ||
|
||
|
||
@click.command() | ||
@click.option( | ||
'-p', | ||
'--path', | ||
required=True, | ||
help='Путь до папки с файлами для парсинга', | ||
) | ||
@click.option( | ||
'-r', | ||
'--result', | ||
is_flag=True, | ||
help='Вывод в консоль извлеченных данных и статистики', | ||
) | ||
def parsing_file(path: str, result: bool) -> None: | ||
"""Функция запускает парсинг файлов в рамках проекта. | ||
Запуск через командную строку: | ||
'python parser.py -p(--path) путь_до_папки_с_файлами' | ||
Вызов справки 'python parser.py -h(--help)' | ||
""" | ||
results_list = [] | ||
files, numeric_statuses_file = get_all_files(path) | ||
if numeric_statuses_file is None: | ||
click.echo(NUMERIC_STATUSES_FILE_ERROR.format(NUMERIC_STATUSES)) | ||
return | ||
numeric_statuses = find_numeric_statuses( | ||
docx.Document(numeric_statuses_file) | ||
) | ||
click.echo(f'Найдено {len(files)} файлов.') | ||
for file in files: | ||
if file.endswith('docx'): | ||
results_list.extend(docx_parser(file, numeric_statuses)) | ||
else: | ||
results_list.extend(xlsx_parser(file)) # type: ignore | ||
if result: | ||
for data in results_list: | ||
pprint(data) | ||
results_list = list(set(results_list)) | ||
click.echo(f'Успешно обработано {len(files)} файлов.') | ||
click.echo(f'Извлечено {len(results_list)} уникальных записей') | ||
|
||
|
||
def get_all_files(path: str) -> tuple[list[str], str | None]: | ||
"""Функция извлекает из папки, в том числе вложенных, | ||
список всех файлов и отдельно путь до файла с числовыми статусами. | ||
Извлекаются только файлы с расширениями указанными в константе | ||
FILES_EXTENSIONS (по умолчанию docx, xlsx) и не извлекает файлы, название | ||
которых без расширения указано в списке FILES_BLACK_LIST. | ||
""" | ||
files = [] | ||
numeric_statuses_filepath = None | ||
for dirpath, dirnames, filenames in os.walk(path): | ||
for filename in filenames: | ||
if filename == NUMERIC_STATUSES: | ||
numeric_statuses_filepath = os.path.join(dirpath, filename) | ||
file, extension = os.path.splitext(filename) | ||
if (not file.startswith('~') | ||
and extension in FILES_EXTENSIONS | ||
and file not in FILES_BLACK_LIST): | ||
files.append(os.path.join(dirpath, filename)) | ||
return files, numeric_statuses_filepath | ||
|
||
|
||
if __name__ == '__main__': | ||
parsing_file() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import openpyxl | ||
|
||
from adaptive_hockey_federation.core.user_card import BaseUserInfo | ||
|
||
|
||
def xlsx_parser(path: str) -> list[BaseUserInfo]: | ||
"""Функция парсит xlsx файлы и возвращает | ||
игроков в виде dataclass ExcelData. | ||
""" | ||
players = [] | ||
sheet_data = [] | ||
workbook = openpyxl.load_workbook(path) | ||
sheet = workbook.active | ||
header = [cell.value for cell in sheet[1]] # type: ignore | ||
for row in sheet.iter_rows(min_row=2, values_only=True): # type: ignore | ||
sheet_data.append(dict(zip(header, row))) | ||
for data in sheet_data: | ||
if data.get('ФИО игрока') is not None: | ||
player = BaseUserInfo( | ||
team=data.get('Команда'), | ||
name=data.get('ФИО игрока').split()[0], # type: ignore | ||
surname=data.get('ФИО игрока').split()[1], # type: ignore | ||
date_of_birth=data.get('Дата рождения'), | ||
player_number=data.get('Номер игрока'), | ||
position=data.get('Позиция'), | ||
classification=data.get('Класс'), | ||
revision=data.get('Пересмотр (начало сезона)'), | ||
numeric_status=None | ||
) | ||
players.append(player) | ||
return players |
Binary file not shown.
Oops, something went wrong.