From 2c1a055a5b5d841720019479c695bd34ca062877 Mon Sep 17 00:00:00 2001 From: Charlotte Kostelic Date: Tue, 15 Oct 2024 09:46:08 -0400 Subject: [PATCH] Documentation (#5) * added to README.md and docstrings * fixed get_vendor_files test, added to README.md * updated README with additional information * fixed test_cli with validate-file changes * changed mock_creds to mock_vendor_creds * added tests, changed get_control_number * fixed mock_sheet_config_no_creds fixture --- README.md | 66 ++++++++- tests/conftest.py | 278 +++++++++++++++++++++++------------ tests/test_cli.py | 6 +- tests/test_commands.py | 14 +- tests/test_validator.py | 55 +++++++ vendor_file_cli/__init__.py | 87 +++++++---- vendor_file_cli/commands.py | 35 ++++- vendor_file_cli/validator.py | 98 ++++++++---- 8 files changed, 465 insertions(+), 174 deletions(-) diff --git a/README.md b/README.md index d55309e..1ac1d52 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,66 @@ # vendor-file-cli -CLI tool to retrieve files from vendor servers +CLI tool to retrieve files from vendor servers. + +## Setup +### Install with pip +1. Create a folder: `$ mkdir vendor-file-cli` +2. Navidate to folder: `$ cd vendor-file-cli` +3. Create a virtual environment and activate it: + `$ python -m venv .venv & $ source ./.venv/scripts/activate` +4. Install from Github: + `$ pip install git+https://github.com/BookOps-CAT/vendor-file-cli` + + +### Install with Poetry +1. Clone repository +2. Navigate to project directory in terminal +3. Activate virtual environment in poetry with `$ poetry shell` +4. Install dependencies with `$ poetry install` + + + +## Usage +``` +$ fetch all-available-records +``` + +This project provides a command line interface to connect to and retrieve files from vendors using FTP/SFTP. Files are copied to the vendor's directory on BookOps' NSDROP SFTP server. + +This CLI can also validate MARC records using the models defined in [record-validator](https://github.com/BookOps-CAT/record-validator). Currently this tool is able to validate records for Eastview, Leila, and Amalivre (SASB). + +### Commands +The following information is also available using `validator --help` + +#### Available commands + +##### Retrieve all new files +`$ fetch all-vendor-files` + +Reads credentials for all vendor servers from a `yaml` file. Retrieves all new files for all vendors with credentials in the `yaml` file. + - Logs into each vendor's server, + - Creates a lists of files on the server and in the corresponding directory on NSDROP, + - Copies all files from the vendor's server that are not in the NSDROP directory, + - For select vendors the records will be validated before they are copied to NSDROP + - Currently these vendors are Eastview, Leila, and Amalivre (SASB) + - The validation output is written to a [google sheet](https://docs.google.com/spreadsheets/d/1ZYuhMIE1WiduV98Pdzzw7RwZ08O-sJo7HJihWVgSOhQ/edit?usp=sharing). + +##### List all vendors configured to work with CLI +`$ fetch available-vendors` + +Reads the local `yaml` config file and prints the list of vendors who are configured to work with the CLI. + +##### Validate vendor .mrc files +`$ fetch validate-file` + - `-v`/`--vendor` vendor whose files you would like to validate + +Validates files for the vendor specified using the `-v`/`--vendor` option. + +##### Retrieve files for a specified vendor within a specific timeframe + +`$ fetch vendor-files` + - `-v`/`--vendor` vendor whose files you would like to validate + - `-d`/`--day` number of days to go back and retrieve files from + - `-h`/`--hour` number of hours to go back and retrieve files from + +Retrieves files for a specified vendor within the specified timeframe. If neither `--day` nor `--hour` is provided, all files will be retrieved. If the file already exists in the corresponding directory on NSDROP, it will be skipped. Command accepts multiple args passed to `-v`/`--vendor`, eg. to fetch files from Eastview and Leila created within the last 10 days: + `$ fetch vendor-files -v eastview -v leila -d 10` diff --git a/tests/conftest.py b/tests/conftest.py index 80b3efc..02e962a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ import io import os +from googleapiclient.errors import HttpError # type: ignore from pymarc import Record, Field, Subfield import pytest from click.testing import CliRunner @@ -7,7 +8,7 @@ from file_retriever.file import File, FileInfo -def stub_marc(): +def create_marc(): bib = Record() bib.leader = "00454cam a22001575i 4500" bib.add_field(Field(tag="001", data="on1381158740")) @@ -27,14 +28,6 @@ def stub_marc(): indicators=["0", "0"], subfields=[ Subfield(code="a", value="Title :"), - Subfield( - code="b", - value="subtitle /", - ), - Subfield( - code="c", - value="Author", - ), ], ) ) @@ -124,22 +117,15 @@ def stub_marc(): @pytest.fixture def stub_record(): - return stub_marc() + return create_marc() -@pytest.fixture -def stub_file_info() -> FileInfo: - return mock_file_info(file_name="foo.mrc") - - -def mock_file( - file_name: str | None = None, -) -> File: - if file_name is None: +def mock_file_info(file_name: FileInfo | str | None = None) -> FileInfo: + if isinstance(file_name, FileInfo): + return file_name + elif file_name is None: file_name = "foo.mrc" - marc_data = stub_marc() - record = marc_data.as_marc21() - return File( + return FileInfo( file_name=file_name, file_mtime=1704070800, file_mode=33188, @@ -147,22 +133,25 @@ def mock_file( file_gid=0, file_uid=0, file_size=140401, - file_stream=io.BytesIO(record), ) -def mock_file_info(file_name: str | None = None) -> FileInfo: - if file_name is None: - file_name = "foo.mrc" - return FileInfo( - file_name=file_name, - file_mtime=1704070800, - file_mode=33188, - file_atime=None, - file_gid=0, - file_uid=0, - file_size=140401, - ) +def mock_file(file: File | FileInfo | str) -> File: + if isinstance(file, File): + return file + elif isinstance(file, str): + file_info = mock_file_info(file_name=file) + elif isinstance(file, FileInfo): + file_info = file + else: + file_info = mock_file_info(file_name=None) + marc_data = create_marc() + return File.from_fileinfo(file_info, io.BytesIO(marc_data.as_marc21())) + + +@pytest.fixture +def stub_file_info() -> FileInfo: + return mock_file_info(file_name="foo.mrc") class MockClient: @@ -173,23 +162,10 @@ def close(self) -> None: pass def fetch_file(self, file, *args, **kwargs) -> File: - if isinstance(file, str): - return mock_file(file_name=file) - elif isinstance(file, FileInfo): - marc_data = stub_marc() - return File.from_fileinfo(file, io.BytesIO(marc_data.as_marc21())) - elif isinstance(file, File): - return file - else: - return mock_file(file_name=None) + return mock_file(file=file) def get_file_data(self, file_name, *args, **kwargs) -> FileInfo: - if isinstance(file_name, str): - return mock_file_info(file_name=file_name) - elif isinstance(file_name, FileInfo): - return file_name - else: - return mock_file_info(file_name=None) + return mock_file_info(file_name=file_name) def is_active(self) -> bool: return True @@ -198,60 +174,41 @@ def list_file_data(self, *args, **kwargs) -> list[FileInfo]: return [mock_file_info(file_name=None)] def write_file(self, file, *args, **kwargs) -> FileInfo: - if isinstance(file, str): - return mock_file(file_name=file) - elif isinstance(file, FileInfo): - marc_data = stub_marc() - return File.from_fileinfo(file, io.BytesIO(marc_data.as_marc21())) - elif isinstance(file, File): - return file - else: - return mock_file(file_name=None) + return mock_file(file=file) @pytest.fixture -def mock_Client(monkeypatch): +def mock_Client(monkeypatch, mock_vendor_creds): def mock_check_file(*args, **kwargs): return False def mock_session(*args, **kwargs): return MockClient() - def mock_path(*args, **kwargs): - return "foo" + def mock_sheet(*args, **kwargs): + return {"foo": "bar"} - monkeypatch.setattr("vendor_file_cli.validator.configure_sheet", mock_path) - monkeypatch.setattr("vendor_file_cli.validator.send_data_to_sheet", mock_path) + monkeypatch.setattr("vendor_file_cli.validator.configure_sheet", MockCreds) + monkeypatch.setattr("vendor_file_cli.validator.send_data_to_sheet", mock_sheet) monkeypatch.setenv("USERPROFILE", "test") - monkeypatch.setattr("os.path.join", mock_path) monkeypatch.setattr(Client, "check_file", mock_check_file) monkeypatch.setattr(Client, "_Client__connect_to_server", mock_session) @pytest.fixture -def mock_creds(): - ( - os.environ["NSDROP_HOST"], - os.environ["NSDROP_USER"], - os.environ["NSDROP_PASSWORD"], - os.environ["NSDROP_PORT"], - os.environ["NSDROP_SRC"], - ) = ("sftp.foo.com", "NSDROP", "nsdrop", "22", "nsdrop_src") - ( - os.environ["EASTVIEW_HOST"], - os.environ["EASTVIEW_USER"], - os.environ["EASTVIEW_PASSWORD"], - os.environ["EASTVIEW_PORT"], - os.environ["EASTVIEW_SRC"], - os.environ["EASTVIEW_DST"], - ) = ( - "sftp.foo.com", - "eastview", - "evp", - "22", - "eastview_src", - "NSDROP/vendor_records/eastview", - ) +def mock_vendor_creds() -> None: + vendors = ["NSDROP", "EASTVIEW"] + for vendor in vendors: + vars = { + f"{vendor}_HOST": f"ftp.{vendor.lower()}.com", + f"{vendor}_USER": f"{vendor.lower()}", + f"{vendor}_PASSWORD": "bar", + f"{vendor}_PORT": "22", + f"{vendor}_SRC": f"{vendor.lower()}_src", + f"{vendor}_DST": f"NSDROP/vendor_records/{vendor.lower()}", + } + for k, v in vars.items(): + os.environ[k] = v @pytest.fixture @@ -274,21 +231,152 @@ def mock_open_yaml_file(mocker): @pytest.fixture -def mock_load_vendor_creds(monkeypatch, mock_open_yaml_file): - def mock_path(*args, **kwargs): - return "testdir" - +def mock_cred_config(monkeypatch, mock_open_yaml_file): def mock_load_vendor_creds(*args, **kwargs): return ["FOO", "BAR", "BAZ", "NSDROP"] monkeypatch.setattr( "vendor_file_cli.config.load_vendor_creds", mock_load_vendor_creds ) - monkeypatch.setenv("USERPROFILE", "test") - monkeypatch.setattr("os.path.join", mock_path) @pytest.fixture -def cli_runner(mocker, mock_Client, mock_load_vendor_creds): +def cli_runner(mocker, mock_Client, mock_cred_config): runner = CliRunner() return runner + + +class MockCreds: + def __init__(self): + self.token = "foo" + + @property + def valid(self, *args, **kwargs): + return True + + @property + def expired(self, *args, **kwargs): + return False + + @property + def refresh_token(self, *args, **kwargs): + return "bar" + + def refresh(self, *args, **kwargs): + self.token = "baz" + self.expired = False + self.valid = True + + def to_json(self, *args, **kwargs): + pass + + +@pytest.fixture +def mock_open_file(mocker): + m = mocker.mock_open(read_data="foo") + mocker.patch("builtins.open", m) + return m + + +@pytest.fixture +def mock_sheet_config(monkeypatch): + def get_creds(*args, **kwargs): + return MockCreds() + + def mock_path_exists(*args, **kwargs): + return True + + monkeypatch.setattr( + "google.oauth2.credentials.Credentials.from_authorized_user_file", + get_creds, + ) + monkeypatch.setattr("os.path.exists", mock_path_exists) + monkeypatch.setenv("USERPROFILE", "test") + + +@pytest.fixture +def mock_sheet_config_creds_invalid(monkeypatch, mock_sheet_config, mock_open_file): + monkeypatch.setattr(MockCreds, "valid", False) + monkeypatch.setattr(MockCreds, "expired", True) + + +class MockFlow: + def run_local_server(self, *args, **kwargs): + return MockCreds() + + +@pytest.fixture +def mock_sheet_config_no_creds(monkeypatch, mock_sheet_config, mock_open_file): + def mock_flow(*args, **kwargs): + return MockFlow() + + def auth_user_file(*args, **kwargs): + return None + + monkeypatch.setattr( + "google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file", mock_flow + ) + monkeypatch.setattr( + "google_auth_oauthlib.flow.InstalledAppFlow.from_client_config", mock_flow + ) + monkeypatch.setattr( + "google.oauth2.credentials.Credentials.from_authorized_user_file", + auth_user_file, + ) + + +class MockResource: + def append(self, spreadsheetId, range, *args, **kwargs): + self.spreadsheetId = spreadsheetId + self.range = range + return self + + def execute(self, *args, **kwargs): + data = {k: v for k, v in self.__dict__.items() if not k.startswith("__")} + return { + "spreadsheetId": data["spreadsheetId"], + "tableRange": data["range"], + } + + def spreadsheets(self, *args, **kwargs): + return self + + def values(self, *args, **kwargs): + return self + + +class MockError: + def __init__(self): + self.status = 400 + self.reason = "bad_request" + + +@pytest.fixture +def mock_sheet_resource(monkeypatch): + def build_sheet(*args, **kwargs): + return MockResource() + + monkeypatch.setattr("googleapiclient.discovery.build", build_sheet) + monkeypatch.setattr("googleapiclient.discovery.build_from_document", build_sheet) + + +@pytest.fixture +def mock_sheet_http_error(monkeypatch, mock_sheet_resource): + def mock_error(*args, **kwargs): + raise HttpError( + resp=MockError(), + content=b"{'error': {'message': 'Bad Request'}}", + uri="foo", + ) + + monkeypatch.setattr("googleapiclient.discovery.build", mock_error) + monkeypatch.setattr("googleapiclient.discovery.build_from_document", mock_error) + + +@pytest.fixture +def mock_sheet_timeout_error(monkeypatch, mock_sheet_resource): + def mock_error(*args, **kwargs): + raise TimeoutError("Connection timed out") + + monkeypatch.setattr("googleapiclient.discovery.build", mock_error) + monkeypatch.setattr("googleapiclient.discovery.build_from_document", mock_error) diff --git a/tests/test_cli.py b/tests/test_cli.py index 02d1529..b3ecf38 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -84,16 +84,16 @@ def test_vendor_file_cli_get_recent_vendor_files_multiple_vendors(cli_runner, ca def test_vendor_file_cli_validate_vendor_files(cli_runner, caplog): result = cli_runner.invoke( cli=vendor_file_cli, - args=["validate-file", "-v", "foo"], + args=["validate-file", "-v", "foo", "-f", "foo.mrc"], ) assert result.exit_code == 0 assert "(NSDROP) Connected to server" in caplog.text assert ( - "(NSDROP) Retrieving list of files in `NSDROP/vendor_records/foo`" + "(NSDROP) Retrieving file info for foo.mrc from NSDROP/vendor_records/foo" in caplog.text ) - assert "(NSDROP) 1 file(s) in `NSDROP/vendor_records/foo`" in caplog.text assert "(NSDROP) Closing client session" in caplog.text assert "(NSDROP) Connection closed" in caplog.text assert "(NSDROP) Connected to server" in caplog.text assert "(NSDROP) Fetching foo.mrc from `NSDROP/vendor_records/foo`" in caplog.text + assert "(NSDROP) Validating foo file: foo.mrc" in caplog.text diff --git a/tests/test_commands.py b/tests/test_commands.py index cc5e78f..6d07bad 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -61,8 +61,8 @@ def test_get_vendor_files(mock_Client, caplog): assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/foo`" in caplog.text -def test_get_vendor_files_no_files(mock_Client, caplog, mock_creds): - get_vendor_files(vendors=["eastview"], days=1, hours=1, minutes=1) +def test_get_vendor_files_no_files(mock_Client, caplog): + get_vendor_files(vendors=["eastview"], days=1, hours=1) assert "(NSDROP) Connected to server" in caplog.text assert "(EASTVIEW) Connected to server" in caplog.text assert "(EASTVIEW) Retrieving list of files in " in caplog.text @@ -70,7 +70,7 @@ def test_get_vendor_files_no_files(mock_Client, caplog, mock_creds): assert "(EASTVIEW) Closing client session" in caplog.text -def test_get_single_file_no_validation(mock_Client, stub_file_info, caplog, mock_creds): +def test_get_single_file_no_validation(mock_Client, stub_file_info, caplog): vendor_client = connect("eastview") nsdrop_client = connect("nsdrop") get_single_file( @@ -89,9 +89,7 @@ def test_get_single_file_no_validation(mock_Client, stub_file_info, caplog, mock assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/eastview`" in caplog.text -def test_get_single_file_with_validation( - mock_Client, stub_file_info, caplog, mock_creds -): +def test_get_single_file_with_validation(mock_Client, stub_file_info, caplog): vendor_client = connect("eastview") nsdrop_client = connect("nsdrop") get_single_file( @@ -111,7 +109,7 @@ def test_get_single_file_with_validation( assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/eastview`" in caplog.text -def test_validate_files(mock_Client, caplog, mock_creds): +def test_validate_files(mock_Client, caplog): validate_files(vendor="eastview", files=None) assert ( "(NSDROP) Retrieving list of files in `NSDROP/vendor_records/eastview`" @@ -124,7 +122,7 @@ def test_validate_files(mock_Client, caplog, mock_creds): assert "(NSDROP) Validating eastview file: foo.mrc" in caplog.text -def test_validate_files_with_list(mock_Client, caplog, mock_creds): +def test_validate_files_with_list(mock_Client, caplog): validate_files(vendor="eastview", files=["foo.mrc", "bar.mrc"]) assert ( "(NSDROP) Fetching foo.mrc from `NSDROP/vendor_records/eastview`" in caplog.text diff --git a/tests/test_validator.py b/tests/test_validator.py index 149b59b..6ecc29f 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,12 +1,38 @@ from pymarc import Field, Subfield import pytest from vendor_file_cli.validator import ( + configure_sheet, get_control_number, map_vendor_to_code, + send_data_to_sheet, validate_single_record, ) +def test_configure_sheet_success(mock_sheet_config): + creds = configure_sheet() + assert creds.token == "foo" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + + +def test_configure_sheet_invalid(mock_sheet_config_creds_invalid): + creds = configure_sheet() + assert creds.token == "baz" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + + +def test_configure_sheet_generate_new_creds(mock_sheet_config_no_creds): + creds = configure_sheet() + assert creds.token == "foo" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + + def test_get_control_number(stub_record): control_no = get_control_number(stub_record) assert control_no == "on1381158740" @@ -59,6 +85,35 @@ def test_map_vendor_to_code(vendor, code): assert map_vendor_to_code(vendor) == code +def test_send_data_to_sheet(mock_sheet_config, mock_sheet_resource): + creds = configure_sheet() + data = send_data_to_sheet(vendor_code="EVP", values=[["foo", "bar"]], creds=creds) + assert sorted(list(data.keys())) == sorted( + [ + "spreadsheetId", + "tableRange", + ] + ) + + +def test_send_data_to_sheet_http_error( + caplog, mock_sheet_config, mock_sheet_http_error +): + creds = configure_sheet() + data = send_data_to_sheet(vendor_code="EVP", values=[["foo", "bar"]], creds=creds) + assert data is None + assert "Error occurred while sending data to google sheet: " in caplog.text + + +def test_send_data_to_sheet_timeout_error( + caplog, mock_sheet_config, mock_sheet_timeout_error +): + creds = configure_sheet() + data = send_data_to_sheet(vendor_code="EVP", values=[["foo", "bar"]], creds=creds) + assert data is None + assert "Error occurred while sending data to google sheet: " in caplog.text + + def test_validate_single_record(stub_record): assert validate_single_record(stub_record) == { "valid": True, diff --git a/vendor_file_cli/__init__.py b/vendor_file_cli/__init__.py index 5b5cb26..d573df2 100644 --- a/vendor_file_cli/__init__.py +++ b/vendor_file_cli/__init__.py @@ -19,7 +19,21 @@ def vendor_file_cli() -> None: short_help="Retrieve and validate files that are not in NSDROP.", ) def get_all_vendor_files() -> None: - """Retrieve files from vendor server not present in vendor's NSDROP directory.""" + """ + Retrieve files from vendor server not present in vendor's NSDROP directory. Creates + list of files on vendor server and list of files in NSDROP directory. Copies files + from vendor server to NSDROP directory if they are not already present. Validates + files for Eastview, Leila, and Amalivre (SASB) before copying them to NSDROP and + writes output of validation to google sheet. Files are copied to + NSDROP/vendor_records/{vendor_name}. + + Args: + None + + Returns: + None + + """ vendor_list = load_vendor_creds( os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") ) @@ -35,6 +49,42 @@ def get_available_vendors() -> None: click.echo(f"Available vendors: {vendor_list}") +@vendor_file_cli.command( + "validate-file", + short_help="Validate vendor file on NSDROP.", +) +@click.option( + "--vendor", + "-v", + "vendor", + help="Which vendor to validate files for.", +) +@click.option( + "--file", + "-f", + "file", + help="The file you would like to validate.", +) +def validate_vendor_files(vendor: str, file: str) -> None: + """ + Validate files for a specific vendor. + + Args: + vendor: + name of vendor to validate files for. files will be validated for + the specified vendor + file: + name of file to validate + Returns: + None + """ + if not os.getenv("GITHUB_ACTIONS"): + load_vendor_creds( + os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") + ) + validate_files(vendor=vendor, files=[file]) + + @vendor_file_cli.command( "vendor-files", short_help="Retrieve files from remote server based on timedelta.", @@ -63,15 +113,7 @@ def get_available_vendors() -> None: type=int, help="How many hours back to retrieve files.", ) -@click.option( - "--minutes", - "-m", - "minutes", - default=0, - type=int, - help="How many minutes back to retrieve files.", -) -def get_recent_vendor_files(vendor: str, days: int, hours: int, minutes: int) -> None: +def get_recent_vendor_files(vendor: str, days: int, hours: int) -> None: """ Retrieve files from remote server for specified vendor(s). @@ -85,8 +127,9 @@ def get_recent_vendor_files(vendor: str, days: int, hours: int, minutes: int) -> number of days to go back and retrieve files from hours: number of hours to go back and retrieve files from - minutes: - number of minutes to go back and retrieve files from + + Returns: + None """ all_available_vendors = load_vendor_creds( @@ -96,25 +139,7 @@ def get_recent_vendor_files(vendor: str, days: int, hours: int, minutes: int) -> vendor_list = all_available_vendors else: vendor_list = [i.upper() for i in vendor] - get_vendor_files(vendors=vendor_list, days=days, hours=hours, minutes=minutes) - - -@vendor_file_cli.command( - "validate-file", - short_help="Validate vendor file on NSDROP.", -) -@click.option( - "--vendor", - "-v", - "vendor", - help="Which vendor to validate files for.", -) -def validate_vendor_files(vendor: str) -> None: - if not os.getenv("GITHUB_ACTIONS"): - load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) - validate_files(vendor=vendor, files=None) + get_vendor_files(vendors=vendor_list, days=days, hours=hours) def main(): diff --git a/vendor_file_cli/commands.py b/vendor_file_cli/commands.py index 4d1ef5a..e1b8769 100644 --- a/vendor_file_cli/commands.py +++ b/vendor_file_cli/commands.py @@ -37,12 +37,11 @@ def get_vendor_files( vendors: list[str], days: int = 0, hours: int = 0, - minutes: int = 0, ) -> None: """ Retrieve files from remote server for vendors in `vendor_list`. Forms timedelta - object from `days`, `hours`, and `minutes` and creates list of files created within - that time delta. If days, hours, and minutes args are not provided, all files that + object from `days` and `hours` and creates list of files created within + that time delta. If days and hours args are not provided, all files that are in the vendor's remote directory will be included in the list. Compares that list of files to the list of files in the vendor's NSDROP directory only copies the files that are not already present in the NSDROP directory. Will validate files @@ -52,14 +51,13 @@ def get_vendor_files( vendors: list of vendor names days: number of days to retrieve files from (default 0) hours: number of hours to retrieve files from (default 0) - minutes: number of minutes to retrieve files from (default 0) Returns: None """ nsdrop = connect("nsdrop") - timedelta = datetime.timedelta(days=days, hours=hours, minutes=minutes) + timedelta = datetime.timedelta(days=days, hours=hours) for vendor in vendors: with connect(vendor) as client: file_list = client.list_file_info( @@ -82,6 +80,20 @@ def get_vendor_files( def get_single_file( vendor: str, file: FileInfo, vendor_client: Client, nsdrop_client: Client ) -> None: + """ + Get a file from a vendor server and put it in the NSDROP directory. + Validates the file if the vendor is EASTVIEW, LEILA, or AMALIVRE_SASB. + + Args: + vendor: name of vendor + file: `FileInfo` object representing the file to retrieve + vendor_client: `Client` object for the vendor server + nsdrop_client: `Client` object for the NSDROP server + + Returns: + None + + """ fetched_file = vendor_client.get_file( file=file, remote_dir=os.environ[f"{vendor.upper()}_SRC"] ) @@ -99,6 +111,19 @@ def get_single_file( def validate_files(vendor: str, files: list | None) -> None: + """ + Validate files on NSDROP for a speific vendor. + + Args: + vendor: + name of vendor + files: + list of file names to validate (default None). If None, all + files in the vendor's directory on NSDROP will be validated. + + Returns: + None + """ file_dir = os.environ[f"{vendor.upper()}_DST"] vendor_file_list = [] with connect("nsdrop") as nsdrop_client: diff --git a/vendor_file_cli/validator.py b/vendor_file_cli/validator.py index 321c1e2..98da70d 100644 --- a/vendor_file_cli/validator.py +++ b/vendor_file_cli/validator.py @@ -20,7 +20,13 @@ def configure_sheet() -> Credentials: """ - A function to append data to a google sheet for a specific vendor + Get or update credentials for google sheets API and save token to file. + + Args: + None + + Returns: + google.oauth2.credentials.Credentials: Credentials object for google sheet API. """ scopes = ["https://www.googleapis.com/auth/spreadsheets"] cred_path = os.path.join( @@ -42,37 +48,32 @@ def configure_sheet() -> Credentials: def get_control_number(record: Record) -> str: - try: - return str(record["001"].data) - except KeyError: - pass - try: - return record["035"]["a"] - except KeyError: - pass - try: - return record["020"]["a"] - except KeyError: - pass - try: - return record["010"]["a"] - except KeyError: - pass - try: - return record["022"]["a"] - except KeyError: - pass - try: - return record["024"]["a"] - except KeyError: - pass - try: - return record["852"]["h"] - except KeyError: - return "None" + """Get control number from MARC record to output to google sheet.""" + field = record.get("001", None) + if field is not None: + control_number = field.data + if control_number is not None: + return control_number + field_subfield_pairs = [ + ("035", "a"), + ("020", "a"), + ("010", "a"), + ("022", "a"), + ("024", "a"), + ("852", "h"), + ] + for f, s in field_subfield_pairs: + while record.get(f, None) is not None: + field = record.get(f, None) + if field is not None: + subfield = field.get(s) + if subfield is not None: + return subfield + return "None" def map_vendor_to_code(vendor: str) -> str: + """Map vendor name to vendor code for output to google sheet.""" vendor_map = { "EASTVIEW": "EVP", "LEILA": "LEILA", @@ -85,7 +86,7 @@ def map_vendor_to_code(vendor: str) -> str: def read_marc_file_stream(file_obj: File) -> Generator[Record, None, None]: - """for file on NSDROP as File object""" + """Read the filestream within a File object using pymarc""" fh = file_obj.file_stream.getvalue() reader = MARCReader(fh) for record in reader: @@ -94,7 +95,19 @@ def read_marc_file_stream(file_obj: File) -> Generator[Record, None, None]: def send_data_to_sheet(vendor_code: str, values: list, creds: Credentials): """ - A function to append data to a google sheet for a specific vendor + A function to write data to a google sheet for a specific vendor. The function + uses the google sheets API to write data to the sheet. The function takes the + vendor code, the values to write to the sheet, and the credentials for the google + sheet API. + + Args: + + vendor_code: the vendor code for the vendor to write data for. + values: a list of values to write to the google sheet. + creds: the credentials for the google sheets API as a `Credentials` object. + + Returns: + the response from the google sheets API as a dictionary. """ body = { "majorDimension": "ROWS", @@ -124,6 +137,17 @@ def send_data_to_sheet(vendor_code: str, values: list, creds: Credentials): def validate_single_record(record: Record) -> dict[str, Any]: + """ + Validate a single MARC record using the RecordModel. If the record is invalid, + return a dictionary with the error information. If the record is valid, return + a dictionary with the validation information. + + Args: + record: pymarc.Record object representing the record to validate. + + Returns: + dictionary with validation output. + """ try: RecordModel(leader=str(record.leader), fields=record.fields) out = { @@ -152,6 +176,18 @@ def validate_single_record(record: Record) -> dict[str, Any]: def validate_file(file_obj: File, vendor: str, write: bool) -> None: + """ + Validate a file of MARC records and output to google sheet. + + Args: + file_obj: `File` object representing the file to validate. + vendor: name of vendor to validate file for. + write: whether to write the validation results to the google sheet. + + Returns: + None + + """ if vendor.upper() in ["EASTVIEW", "LEILA", "AMALIVRE_SASB"]: vendor_code = map_vendor_to_code(vendor) record_count = len([record for record in read_marc_file_stream(file_obj)])