From 53de1d8d0e9533cbba7c643d0a105260cf95ad38 Mon Sep 17 00:00:00 2001 From: Charlotte Kostelic Date: Mon, 28 Oct 2024 15:36:37 -0400 Subject: [PATCH] Updated logging (#6) * updated dependencies * removed livetest marker * moved load_vendor_creds and logger_config to utils * added utils.py * major updates to cli * fixed utils.py * fixed f string error * removed extra branch from unit-tests.yaml * fixed cli test * fixed issue in logging dictconfig * more fixes * added fixture to mock USERPROFILE env var --- .github/workflows/unit-tests.yaml | 2 +- dev-requirements.txt | 6 +- poetry.lock | 37 ++- pyproject.toml | 4 +- requirements.txt | 6 +- tests/conftest.py | 393 +++++++++++------------------- tests/test_cli.py | 79 ++---- tests/test_commands.py | 128 +++------- tests/test_config.py | 74 ------ tests/test_utils.py | 153 ++++++++++++ tests/test_validator.py | 206 ++++++++-------- vendor_file_cli/__init__.py | 41 ++-- vendor_file_cli/commands.py | 112 +++------ vendor_file_cli/config.py | 63 ----- vendor_file_cli/utils.py | 240 ++++++++++++++++++ vendor_file_cli/validator.py | 336 ++++++++++++------------- 16 files changed, 930 insertions(+), 950 deletions(-) delete mode 100644 tests/test_config.py create mode 100644 tests/test_utils.py delete mode 100644 vendor_file_cli/config.py create mode 100644 vendor_file_cli/utils.py diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index cfacc4f..42de978 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -24,7 +24,7 @@ jobs: python -m pip install -r dev-requirements.txt python -m pip install -e . - name: Run tests - run: python -m pytest -m "not livetest" --cov=vendor_file_cli/ + run: python -m pytest --cov=vendor_file_cli/ - name: Send report to Coveralls uses: AndreMiras/coveralls-python-action@develop with: diff --git a/dev-requirements.txt b/dev-requirements.txt index dfb0f90..c010812 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -9,7 +9,7 @@ colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and (platf coverage[toml]==7.6.1 ; python_version >= "3.10" and python_version < "4.0" cryptography==43.0.0 ; python_version >= "3.10" and python_version < "4.0" exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" -file-retriever @ git+https://github.com/BookOps-CAT/file-retriever.git@8433d49ae3150901c97f8068c03f071b2b66d46a ; python_version >= "3.10" and python_version < "4.0" +file-retriever @ git+https://github.com/BookOps-CAT/file-retriever.git@405c6d20fb1a5017140b9aa3aeb57168d733d98a ; python_version >= "3.10" and python_version < "4.0" google-api-core==2.20.0 ; python_version >= "3.10" and python_version < "4.0" google-api-python-client==2.146.0 ; python_version >= "3.10" and python_version < "4.0" google-auth-httplib2==0.2.0 ; python_version >= "3.10" and python_version < "4.0" @@ -19,6 +19,7 @@ googleapis-common-protos==1.65.0 ; python_version >= "3.10" and python_version < httplib2==0.22.0 ; python_version >= "3.10" and python_version < "4.0" idna==3.10 ; python_version >= "3.10" and python_version < "4.0" iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0" +loggly-python-handler==1.0.1 ; python_version >= "3.10" and python_version < "4.0" numpy==2.1.1 ; python_version >= "3.10" and python_version < "4.0" oauthlib==3.2.2 ; python_version >= "3.10" and python_version < "4.0" packaging==24.1 ; python_version >= "3.10" and python_version < "4.0" @@ -42,7 +43,8 @@ pytest==8.3.2 ; python_version >= "3.10" and python_version < "4.0" python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0" pytz==2024.2 ; python_version >= "3.10" and python_version < "4.0" pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "4.0" -record-validator @ git+https://github.com/BookOps-CAT/record-validator.git@2f0c7934915b44db01c7e21294e78ffb3632e9ff ; python_version >= "3.10" and python_version < "4.0" +record-validator @ git+https://github.com/BookOps-CAT/record-validator.git@c81f97bbbebef3779319ec3ec1fe9e7dec48ccf9 ; python_version >= "3.10" and python_version < "4.0" +requests-futures==1.0.1 ; python_version >= "3.10" and python_version < "4.0" requests-oauthlib==2.0.0 ; python_version >= "3.10" and python_version < "4.0" requests==2.32.3 ; python_version >= "3.10" and python_version < "4.0" rsa==4.9 ; python_version >= "3.10" and python_version < "4" diff --git a/poetry.lock b/poetry.lock index b8ad8fd..0345ff3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -445,7 +445,7 @@ types-pyyaml = "^6.0.12.20240311" type = "git" url = "https://github.com/BookOps-CAT/file-retriever.git" reference = "HEAD" -resolved_reference = "8433d49ae3150901c97f8068c03f071b2b66d46a" +resolved_reference = "405c6d20fb1a5017140b9aa3aeb57168d733d98a" [[package]] name = "google-api-core" @@ -600,6 +600,20 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "loggly-python-handler" +version = "1.0.1" +description = "Python logging handler that sends messages to Loggly" +optional = false +python-versions = "*" +files = [ + {file = "loggly-python-handler-1.0.1.tar.gz", hash = "sha256:adce302d21fbc5b9647a01067243fb549e9992bfe5ab09807b8fad56c9411995"}, + {file = "loggly_python_handler-1.0.1-py3-none-any.whl", hash = "sha256:0a4a826adb31ffeb76dac7ca116ccb032ed923254244160e70ddd2c3e88df4fd"}, +] + +[package.dependencies] +requests-futures = ">=1.0.0" + [[package]] name = "numpy" version = "2.1.1" @@ -1223,7 +1237,7 @@ pymarc = "^5.2.2" type = "git" url = "https://github.com/BookOps-CAT/record-validator.git" reference = "HEAD" -resolved_reference = "2f0c7934915b44db01c7e21294e78ffb3632e9ff" +resolved_reference = "c81f97bbbebef3779319ec3ec1fe9e7dec48ccf9" [[package]] name = "requests" @@ -1246,6 +1260,23 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-futures" +version = "1.0.1" +description = "Asynchronous Python HTTP for Humans." +optional = false +python-versions = "*" +files = [ + {file = "requests-futures-1.0.1.tar.gz", hash = "sha256:f55a4ef80070e2858e7d1e73123d2bfaeaf25b93fd34384d8ddf148e2b676373"}, + {file = "requests_futures-1.0.1-py2.py3-none-any.whl", hash = "sha256:4a2f5472e9911a79532137d156aa937cd9cd90fec55677f71b2976d1f7a66d38"}, +] + +[package.dependencies] +requests = ">=1.2.0" + +[package.extras] +dev = ["black (>=22.3.0)", "build (>=0.7.0)", "isort (>=5.11.4)", "pyflakes (>=2.2.0)", "pytest (>=6.2.5)", "pytest-cov (>=3.0.0)", "pytest-network (>=0.0.1)", "readme-renderer[rst] (>=26.0)", "twine (>=3.4.2)"] + [[package]] name = "requests-oauthlib" version = "2.0.0" @@ -1389,4 +1420,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "ad66b2b25e03e28b5e553f646b2144bc72246c33accc5711d1a6ba1bdbb2b3ba" +content-hash = "182a4a8cb683c50b33afcf8df2543cde7902d4dd8cb030a9fb7d989a7a15eef9" diff --git a/pyproject.toml b/pyproject.toml index 8652843..3615a79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ google-api-python-client = "^2.146.0" google-auth-oauthlib = "^1.2.1" pandas = "^2.2.3" pandas-stubs = "^2.2.2.240909" +loggly-python-handler = "^1.0.1" [tool.poetry.group.dev.dependencies] @@ -32,9 +33,6 @@ fetch = "vendor_file_cli:main" [tool.pytest.ini_options] testpaths = ["tests"] -markers = [ - "livetest: mark a test as using live credentials", -] [tool.coverage.run] diff --git a/requirements.txt b/requirements.txt index e5e7ed6..6f8fae5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "4.0" click==8.1.7 ; python_version >= "3.10" and python_version < "4.0" colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Windows" cryptography==43.0.0 ; python_version >= "3.10" and python_version < "4.0" -file-retriever @ git+https://github.com/BookOps-CAT/file-retriever.git@8433d49ae3150901c97f8068c03f071b2b66d46a ; python_version >= "3.10" and python_version < "4.0" +file-retriever @ git+https://github.com/BookOps-CAT/file-retriever.git@405c6d20fb1a5017140b9aa3aeb57168d733d98a ; python_version >= "3.10" and python_version < "4.0" google-api-core==2.20.0 ; python_version >= "3.10" and python_version < "4.0" google-api-python-client==2.146.0 ; python_version >= "3.10" and python_version < "4.0" google-auth-httplib2==0.2.0 ; python_version >= "3.10" and python_version < "4.0" @@ -16,6 +16,7 @@ google-auth==2.35.0 ; python_version >= "3.10" and python_version < "4.0" googleapis-common-protos==1.65.0 ; python_version >= "3.10" and python_version < "4.0" httplib2==0.22.0 ; python_version >= "3.10" and python_version < "4.0" idna==3.10 ; python_version >= "3.10" and python_version < "4.0" +loggly-python-handler==1.0.1 ; python_version >= "3.10" and python_version < "4.0" numpy==2.1.1 ; python_version >= "3.10" and python_version < "4.0" oauthlib==3.2.2 ; python_version >= "3.10" and python_version < "4.0" pandas-stubs==2.2.2.240909 ; python_version >= "3.10" and python_version < "4.0" @@ -34,7 +35,8 @@ pyparsing==3.1.4 ; python_version >= "3.10" and python_version < "4.0" python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0" pytz==2024.2 ; python_version >= "3.10" and python_version < "4.0" pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "4.0" -record-validator @ git+https://github.com/BookOps-CAT/record-validator.git@2f0c7934915b44db01c7e21294e78ffb3632e9ff ; python_version >= "3.10" and python_version < "4.0" +record-validator @ git+https://github.com/BookOps-CAT/record-validator.git@c81f97bbbebef3779319ec3ec1fe9e7dec48ccf9 ; python_version >= "3.10" and python_version < "4.0" +requests-futures==1.0.1 ; python_version >= "3.10" and python_version < "4.0" requests-oauthlib==2.0.0 ; python_version >= "3.10" and python_version < "4.0" requests==2.32.3 ; python_version >= "3.10" and python_version < "4.0" rsa==4.9 ; python_version >= "3.10" and python_version < "4" diff --git a/tests/conftest.py b/tests/conftest.py index 02e962a..fe275a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,254 +1,175 @@ +import datetime import io +import logging import os from googleapiclient.errors import HttpError # type: ignore -from pymarc import Record, Field, Subfield +from pydantic_core import ValidationError, InitErrorDetails +from pymarc import Record, Field, Subfield, Indicators import pytest from click.testing import CliRunner -from file_retriever.connect import Client from file_retriever.file import File, FileInfo +from file_retriever.connect import Client +from file_retriever._clients import _ftpClient, _sftpClient + + +class MockFileInfo(FileInfo): + def __init__(self, file_name: str | None = None): + today = datetime.datetime.now(tz=datetime.timezone.utc) + mtime = (today - datetime.timedelta(days=10)).timestamp() + if file_name is None: + file_name = "foo.mrc" + super().__init__(file_name, mtime, 33188, 140401, 0, 0, None) -def create_marc(): +def mock_marc() -> Record: bib = Record() bib.leader = "00454cam a22001575i 4500" bib.add_field(Field(tag="001", data="on1381158740")) - bib.add_field(Field(tag="008", data="190306s2017 ht a j 000 1 hat d")) - bib.add_field( - Field( - tag="050", - indicators=[" ", "4"], - subfields=[ - Subfield(code="a", value="F00"), - ], - ) - ) - bib.add_field( - Field( - tag="245", - indicators=["0", "0"], - subfields=[ - Subfield(code="a", value="Title :"), - ], - ) - ) - bib.add_field( - Field( - tag="300", - indicators=[" ", " "], - subfields=[ - Subfield(code="a", value="100 pages :"), - ], - ) - ) bib.add_field( Field( tag="852", - indicators=["8", " "], - subfields=[ - Subfield(code="h", value="ReCAP 23-100000"), - ], - ) - ) - bib.add_field( - Field( - tag="901", - indicators=[" ", " "], - subfields=[ - Subfield(code="a", value="EVP"), - ], - ) - ) - bib.add_field( - Field( - tag="910", - indicators=[" ", " "], - subfields=[ - Subfield(code="a", value="RL"), - ], - ) - ) - bib.add_field( - Field( - tag="949", - indicators=[" ", "1"], - subfields=[ - Subfield(code="z", value="8528"), - Subfield(code="a", value="ReCAP 23-100000"), - Subfield(code="c", value="1"), - Subfield(code="h", value="43"), - Subfield(code="i", value="33433123456789"), - Subfield(code="l", value="rcmf2"), - Subfield(code="m", value="bar"), - Subfield(code="p", value="1.00"), - Subfield(code="t", value="55"), - Subfield(code="u", value="foo"), - Subfield(code="v", value="EVP"), - ], - ) - ) - bib.add_field( - Field( - tag="960", - indicators=[" ", " "], - subfields=[ - Subfield(code="s", value="100"), - Subfield(code="t", value="MAF"), - Subfield(code="u", value="123456apprv"), - ], - ) - ) - bib.add_field( - Field( - tag="980", - indicators=[" ", " "], - subfields=[ - Subfield(code="a", value="240101"), - Subfield(code="b", value="100"), - Subfield(code="c", value="100"), - Subfield(code="d", value="000"), - Subfield(code="e", value="200"), - Subfield(code="f", value="123456"), - Subfield(code="g", value="1"), - ], + indicators=Indicators("8", " "), + subfields=[Subfield("h", "ReCAP 23-100000")], ) ) return bib @pytest.fixture -def stub_record(): - return create_marc() - - -def mock_file_info(file_name: FileInfo | str | None = None) -> FileInfo: - if isinstance(file_name, FileInfo): - return file_name - elif file_name is None: - file_name = "foo.mrc" - return FileInfo( - file_name=file_name, - file_mtime=1704070800, - file_mode=33188, - file_atime=None, - file_gid=0, - file_uid=0, - file_size=140401, - ) - - -def mock_file(file: File | FileInfo | str) -> File: - if isinstance(file, File): - return file - elif isinstance(file, str): - file_info = mock_file_info(file_name=file) - elif isinstance(file, FileInfo): - file_info = file - else: - file_info = mock_file_info(file_name=None) - marc_data = create_marc() - return File.from_fileinfo(file_info, io.BytesIO(marc_data.as_marc21())) +def stub_record() -> Record: + return mock_marc() @pytest.fixture def stub_file_info() -> FileInfo: - return mock_file_info(file_name="foo.mrc") + return MockFileInfo(file_name="foo.mrc") -class MockClient: - def _check_dir(self, *args, **kwargs) -> None: - pass +@pytest.fixture +def stub_file(stub_file_info, mock_valid_record) -> File: + return File.from_fileinfo(stub_file_info, io.BytesIO(mock_marc().as_marc21())) + - def close(self) -> None: +@pytest.fixture +def mock_valid_record(monkeypatch, stub_record) -> Record: + def mock_validation(*args, **kwargs): pass - def fetch_file(self, file, *args, **kwargs) -> File: - return mock_file(file=file) + monkeypatch.setattr("vendor_file_cli.validator.RecordModel", mock_validation) + return stub_record - def get_file_data(self, file_name, *args, **kwargs) -> FileInfo: - return mock_file_info(file_name=file_name) - def is_active(self) -> bool: - return True +@pytest.fixture +def mock_invalid_record(monkeypatch, stub_record) -> Record: + def mock_validation(*args, **kwargs): + errors = [InitErrorDetails(type="missing", loc=("fields", "960"), msg="foo")] + raise ValidationError.from_exception_data("list", errors) - def list_file_data(self, *args, **kwargs) -> list[FileInfo]: - return [mock_file_info(file_name=None)] + monkeypatch.setattr("vendor_file_cli.validator.RecordModel", mock_validation) + return stub_record - def write_file(self, file, *args, **kwargs) -> FileInfo: - return mock_file(file=file) +class MockSession: + def _check_dir(self, *args, **kwargs): + pass -@pytest.fixture -def mock_Client(monkeypatch, mock_vendor_creds): - def mock_check_file(*args, **kwargs): - return False + def close(self, *args, **kwargs): + pass - def mock_session(*args, **kwargs): - return MockClient() + def get_file_data(self, file_name, *args, **kwargs) -> FileInfo: + return MockFileInfo(file_name=file_name) + + def list_file_data(self, dir, *args, **kwargs) -> list[FileInfo]: + if "NSDROP" in dir: + return [MockFileInfo(file_name="bar.mrc")] + else: + return [MockFileInfo(file_name="foo.mrc")] + + def list_file_names(self, dir, *args, **kwargs) -> list[str]: + if "NSDROP" in dir: + return ["bar.mrc"] + elif "midwest" or "MIDWEST" in dir: + return ["NYP_10012024_ALL_01.mrc"] + else: + return ["foo.mrc"] - def mock_sheet(*args, **kwargs): - return {"foo": "bar"} + def fetch_file(self, file, *args, **kwargs) -> File: + return File.from_fileinfo(file, io.BytesIO(mock_marc().as_marc21())) - monkeypatch.setattr("vendor_file_cli.validator.configure_sheet", MockCreds) - monkeypatch.setattr("vendor_file_cli.validator.send_data_to_sheet", mock_sheet) - monkeypatch.setenv("USERPROFILE", "test") - monkeypatch.setattr(Client, "check_file", mock_check_file) - monkeypatch.setattr(Client, "_Client__connect_to_server", mock_session) + def write_file(self, file, *args, **kwargs) -> FileInfo: + return MockFileInfo(file_name=file.file_name) @pytest.fixture -def mock_vendor_creds() -> None: - vendors = ["NSDROP", "EASTVIEW"] - for vendor in vendors: - vars = { - f"{vendor}_HOST": f"ftp.{vendor.lower()}.com", - f"{vendor}_USER": f"{vendor.lower()}", - f"{vendor}_PASSWORD": "bar", - f"{vendor}_PORT": "22", - f"{vendor}_SRC": f"{vendor.lower()}_src", - f"{vendor}_DST": f"NSDROP/vendor_records/{vendor.lower()}", - } - for k, v in vars.items(): - os.environ[k] = v +def mock_Client(monkeypatch, mock_sheet_config): + original_connect_to_server = Client._Client__connect_to_server + + def mock_connect_to_server(self, username, password): + original_connect_to_server(self, username, password) + return MockSession() + + monkeypatch.setattr(_ftpClient, "_connect_to_server", lambda *args, **kwargs: None) + monkeypatch.setattr(_sftpClient, "_connect_to_server", lambda *args, **kwargs: None) + monkeypatch.setattr(Client, "_Client__connect_to_server", mock_connect_to_server) + monkeypatch.setattr(Client, "check_file", lambda *args, **kwargs: False) + monkeypatch.setattr(Client, "is_file", True) + monkeypatch.setattr( + "vendor_file_cli.validator.write_data_to_sheet", lambda *args, **kwargs: None + ) -@pytest.fixture -def mock_open_yaml_file(mocker): - vendor_list = [] - vendors = ["FOO", "BAR", "BAZ", "NSDROP"] +@pytest.fixture(autouse=True) +def mock_vendor_creds(monkeypatch) -> str: + vendors = ["NSDROP", "EASTVIEW", "LEILA", "MIDWEST_NYPL", "BAKERTAYLOR_BPL"] + env = {"LOGGLY_TOKEN": "foo"} for vendor in vendors: - string = ( - f"{vendor}_HOST: ftp.{vendor.lower()}.com\n" - f"{vendor}_USER: {vendor.lower()}\n" - f"{vendor}_PASSWORD: bar\n" - f"{vendor}_PORT: '21'\n" - f"{vendor}_SRC: {vendor.lower()}_src\n" - f"{vendor}_DST: {vendor.lower()}_dst\n" - ) - vendor_list.append(string) - yaml_string = "\n".join(vendor_list) - m = mocker.mock_open(read_data=yaml_string) - mocker.patch("builtins.open", m) + env[f"{vendor}_HOST"] = f"ftp.{vendor.lower()}.com" + env[f"{vendor}_USER"] = f"{vendor.lower()}" + env[f"{vendor}_PASSWORD"] = "bar" + env[f"{vendor}_PORT"] = "21" + env[f"{vendor}_SRC"] = f"{vendor.lower()}_src" + env[f"{vendor}_DST"] = f"NSDROP/vendor_records/{vendor.lower()}" + env["NSDROP_PORT"] = "22" + env["EASTVIEW_PORT"] = "22" + yaml_string = "" + for k, v in env.items(): + os.environ[k] = v + yaml_string += f"{k}: {v}\n" + monkeypatch.setattr("vendor_file_cli.utils.load_creds", lambda *args: None) + return yaml_string @pytest.fixture -def mock_cred_config(monkeypatch, mock_open_yaml_file): - def mock_load_vendor_creds(*args, **kwargs): - return ["FOO", "BAR", "BAZ", "NSDROP"] - - monkeypatch.setattr( - "vendor_file_cli.config.load_vendor_creds", mock_load_vendor_creds - ) +def mock_open_file(mock_vendor_creds, mocker) -> None: + m = mocker.mock_open(read_data=mock_vendor_creds) + mocker.patch("vendor_file_cli.utils.open", m) @pytest.fixture -def cli_runner(mocker, mock_Client, mock_cred_config): +def cli_runner(monkeypatch, mock_Client) -> CliRunner: runner = CliRunner() + + def mock_logging(*args, **kwargs): + logger_dict = {"version": 1, "disable_existing_loggers": False} + str_format = ( + "vendor_file_cli-%(asctime)s-%(filename)s-%(levelname)s-%(message)s" + ) + handler = {"class": "StreamHandler", "formatter": "basic", "level": "DEBUG"} + logger_dict.update({"formatters": {"basic": {"format": str_format}}}) + logger_dict.update({"handlers": {"stream": handler}}) + logger_dict.update({"loggers": {}}) + logger_dict["loggers"] = {"file_retriever": {"handlers": ["stream"]}} + logger_dict["loggers"] = {"vendor_file_cli": {"handlers": ["stream"]}} + return logger_dict + + monkeypatch.setattr("logging.config.dictConfig", mock_logging) return runner class MockCreds: def __init__(self): self.token = "foo" + self.refresh_token = "bar" @property def valid(self, *args, **kwargs): @@ -258,85 +179,60 @@ def valid(self, *args, **kwargs): def expired(self, *args, **kwargs): return False - @property - def refresh_token(self, *args, **kwargs): - return "bar" - def refresh(self, *args, **kwargs): - self.token = "baz" self.expired = False self.valid = True def to_json(self, *args, **kwargs): pass - -@pytest.fixture -def mock_open_file(mocker): - m = mocker.mock_open(read_data="foo") - mocker.patch("builtins.open", m) - return m + def run_local_server(self, *args, **kwargs): + return self @pytest.fixture -def mock_sheet_config(monkeypatch): - def get_creds(*args, **kwargs): - return MockCreds() - - def mock_path_exists(*args, **kwargs): - return True +def mock_sheet_config(monkeypatch, caplog, mock_open_file): + def build_sheet(*args, **kwargs): + return MockResource() + caplog.set_level(logging.DEBUG) + monkeypatch.setenv("USERPROFILE", "test") + monkeypatch.setattr("googleapiclient.discovery.build", build_sheet) + monkeypatch.setattr("googleapiclient.discovery.build_from_document", build_sheet) monkeypatch.setattr( "google.oauth2.credentials.Credentials.from_authorized_user_file", - get_creds, + lambda *args, **kwargs: MockCreds(), ) - monkeypatch.setattr("os.path.exists", mock_path_exists) - monkeypatch.setenv("USERPROFILE", "test") @pytest.fixture -def mock_sheet_config_creds_invalid(monkeypatch, mock_sheet_config, mock_open_file): +def mock_sheet_config_creds_invalid(monkeypatch, mock_sheet_config): monkeypatch.setattr(MockCreds, "valid", False) monkeypatch.setattr(MockCreds, "expired", True) -class MockFlow: - def run_local_server(self, *args, **kwargs): - return MockCreds() - - @pytest.fixture -def mock_sheet_config_no_creds(monkeypatch, mock_sheet_config, mock_open_file): - def mock_flow(*args, **kwargs): - return MockFlow() - - def auth_user_file(*args, **kwargs): - return None - +def mock_sheet_config_no_creds(monkeypatch, mock_sheet_config): monkeypatch.setattr( - "google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file", mock_flow - ) - monkeypatch.setattr( - "google_auth_oauthlib.flow.InstalledAppFlow.from_client_config", mock_flow + "google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file", + lambda *args, **kwargs: MockCreds(), ) monkeypatch.setattr( "google.oauth2.credentials.Credentials.from_authorized_user_file", - auth_user_file, + lambda *args, **kwargs: None, ) class MockResource: - def append(self, spreadsheetId, range, *args, **kwargs): - self.spreadsheetId = spreadsheetId - self.range = range + def __init__(self): + self.spreadsheetId = "foo" + self.range = "bar" + + def append(self, *args, **kwargs): return self def execute(self, *args, **kwargs): - data = {k: v for k, v in self.__dict__.items() if not k.startswith("__")} - return { - "spreadsheetId": data["spreadsheetId"], - "tableRange": data["range"], - } + return dict(spreadsheetId=self.spreadsheetId, tableRange=self.range) def spreadsheets(self, *args, **kwargs): return self @@ -352,21 +248,10 @@ def __init__(self): @pytest.fixture -def mock_sheet_resource(monkeypatch): - def build_sheet(*args, **kwargs): - return MockResource() - - monkeypatch.setattr("googleapiclient.discovery.build", build_sheet) - monkeypatch.setattr("googleapiclient.discovery.build_from_document", build_sheet) - - -@pytest.fixture -def mock_sheet_http_error(monkeypatch, mock_sheet_resource): +def mock_sheet_http_error(monkeypatch): def mock_error(*args, **kwargs): raise HttpError( - resp=MockError(), - content=b"{'error': {'message': 'Bad Request'}}", - uri="foo", + resp=MockError(), content=b"{'message': 'Bad Request'}", uri="foo" ) monkeypatch.setattr("googleapiclient.discovery.build", mock_error) @@ -374,7 +259,7 @@ def mock_error(*args, **kwargs): @pytest.fixture -def mock_sheet_timeout_error(monkeypatch, mock_sheet_resource): +def mock_sheet_timeout_error(monkeypatch): def mock_error(*args, **kwargs): raise TimeoutError("Connection timed out") diff --git a/tests/test_cli.py b/tests/test_cli.py index b3ecf38..186f0f9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -17,30 +17,22 @@ def test_vendor_file_cli(): def test_vendor_file_cli_get_all_vendor_files(cli_runner, caplog): - result = cli_runner.invoke( - cli=vendor_file_cli, - args=["all-vendor-files"], - ) + result = cli_runner.invoke(cli=vendor_file_cli, args=["all-vendor-files"]) assert result.exit_code == 0 - assert "(NSDROP) Connected to server" in caplog.text - assert "(FOO) Connected to server" in caplog.text - assert "(FOO) Retrieving list of files in " in caplog.text - assert "(FOO) Closing client session" in caplog.text - assert "(BAR) Connected to server" in caplog.text - assert "(BAR) Retrieving list of files in " in caplog.text - assert "(BAR) Closing client session" in caplog.text - assert "(BAZ) Connected to server" in caplog.text - assert "(BAZ) Retrieving list of files in " in caplog.text - assert "(BAZ) Closing client session" in caplog.text + assert "(NSDROP) Connecting to " in caplog.text + assert "(EASTVIEW) Connecting to " in caplog.text + assert "(EASTVIEW) Client session closed" in caplog.text + assert "(MIDWEST_NYPL) Connecting to " in caplog.text + assert "(MIDWEST_NYPL) Client session closed" in caplog.text -def test_vendor_file_cli_get_available_vendors(cli_runner, caplog): - result = cli_runner.invoke( - cli=vendor_file_cli, - args=["available-vendors"], - ) +def test_vendor_file_cli_get_available_vendors(cli_runner): + result = cli_runner.invoke(cli=vendor_file_cli, args=["available-vendors"]) assert result.exit_code == 0 - assert "Available vendors: ['FOO', 'BAR', 'BAZ']" in result.stdout + assert ( + "Available vendors: ['EASTVIEW', 'LEILA', 'MIDWEST_NYPL', 'BAKERTAYLOR_BPL']" + in result.stdout + ) def test_vendor_file_cli_get_recent_vendor_files(cli_runner, caplog): @@ -48,52 +40,29 @@ def test_vendor_file_cli_get_recent_vendor_files(cli_runner, caplog): cli=vendor_file_cli, args=["vendor-files", "-v", "all"], ) - assert "(NSDROP) Connected to server" in caplog.text - assert "(FOO) Connected to server" in caplog.text - assert "(FOO) Retrieving list of files in " in caplog.text - assert "(FOO) Closing client session" in caplog.text - - -def test_vendor_file_cli_get_recent_vendor_files_none(cli_runner, caplog): - result = cli_runner.invoke( - cli=vendor_file_cli, - args=["vendor-files"], - ) - assert result.runner.get_default_prog_name(vendor_file_cli) == "vendor-file-cli" - assert "(NSDROP) Connected to server" in caplog.text + assert "(NSDROP) Connecting to " in caplog.text + assert "(EASTVIEW) Connecting to " in caplog.text + assert "(EASTVIEW) Client session closed" in caplog.text def test_vendor_file_cli_get_recent_vendor_files_multiple_vendors(cli_runner, caplog): result = cli_runner.invoke( cli=vendor_file_cli, - args=["vendor-files", "-v", "foo", "-v", "bar", "-v", "baz"], + args=["vendor-files", "-v", "eastview", "-v", "leila"], ) assert result.exit_code == 0 - assert "(NSDROP) Connected to server" in caplog.text - assert "(FOO) Connected to server" in caplog.text - assert "(FOO) Retrieving list of files in " in caplog.text - assert "(FOO) Closing client session" in caplog.text - assert "(BAR) Connected to server" in caplog.text - assert "(BAR) Retrieving list of files in " in caplog.text - assert "(BAR) Closing client session" in caplog.text - assert "(BAZ) Connected to server" in caplog.text - assert "(BAZ) Retrieving list of files in " in caplog.text - assert "(BAZ) Closing client session" in caplog.text + assert "(NSDROP) Connecting to " in caplog.text + assert "(EASTVIEW) Connecting to " in caplog.text + assert "(EASTVIEW) Client session closed" in caplog.text + assert "(LEILA) Connecting to " in caplog.text + assert "(LEILA) Client session closed" in caplog.text def test_vendor_file_cli_validate_vendor_files(cli_runner, caplog): result = cli_runner.invoke( cli=vendor_file_cli, - args=["validate-file", "-v", "foo", "-f", "foo.mrc"], + args=["validate-file", "-v", "eastview", "-f", "foo.mrc"], ) assert result.exit_code == 0 - assert "(NSDROP) Connected to server" in caplog.text - assert ( - "(NSDROP) Retrieving file info for foo.mrc from NSDROP/vendor_records/foo" - in caplog.text - ) - assert "(NSDROP) Closing client session" in caplog.text - assert "(NSDROP) Connection closed" in caplog.text - assert "(NSDROP) Connected to server" in caplog.text - assert "(NSDROP) Fetching foo.mrc from `NSDROP/vendor_records/foo`" in caplog.text - assert "(NSDROP) Validating foo file: foo.mrc" in caplog.text + assert "(NSDROP) Connecting to " in caplog.text + assert "(NSDROP) Validating eastview file: foo.mrc" in caplog.text diff --git a/tests/test_commands.py b/tests/test_commands.py index 6d07bad..b1becf4 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,92 +1,50 @@ -import os -import pytest -from file_retriever.connect import Client from vendor_file_cli.commands import ( - connect, get_vendor_files, - get_single_file, validate_files, ) -from vendor_file_cli.config import load_vendor_creds - - -def test_connect(mock_Client, mocker): - yaml_string = """ - FOO_HOST: ftp.testvendor.com - FOO_USER: bar - FOO_PASSWORD: baz - FOO_PORT: '21' - FOO_SRC: foo_src - FOO_DST: foo_dst - """ - m = mocker.mock_open(read_data=yaml_string) - mocker.patch("builtins.open", m) - - load_vendor_creds("foo.yaml") - client = connect("foo") - assert client.name == "FOO" - assert client.host == "ftp.testvendor.com" - assert client.port == "21" - assert isinstance(client, Client) - assert client.session is not None +from vendor_file_cli.validator import get_single_file +from vendor_file_cli.utils import connect def test_get_vendor_files(mock_Client, caplog): - ( - os.environ["NSDROP_HOST"], - os.environ["NSDROP_USER"], - os.environ["NSDROP_PASSWORD"], - os.environ["NSDROP_PORT"], - os.environ["NSDROP_SRC"], - ) = ("sftp.foo.com", "foo", "bar", "22", "foo_src") - vendors = ["foo"] - get_vendor_files(vendors=vendors, days=300) - assert "(NSDROP) Connected to server" in caplog.text - assert "(FOO) Connected to server" in caplog.text - assert "(FOO) Retrieving list of files in " in caplog.text - assert "(FOO) 1 recent file(s) in `foo_src`" in caplog.text - assert "(FOO) Closing client session" in caplog.text - assert ( - "(NSDROP) Checking list of 1 files against `NSDROP/vendor_records/foo`" - in caplog.text - ) - assert ( - "(NSDROP) 1 of 1 files missing from `NSDROP/vendor_records/foo`" in caplog.text - ) - assert "(FOO) Fetching foo.mrc from `foo_src`" in caplog.text - assert ( - "(NSDROP) Checking for file in `NSDROP/vendor_records/foo` before writing" - in caplog.text - ) - assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/foo`" in caplog.text + get_vendor_files(vendors=["leila"], days=300) + assert "(NSDROP) Connecting to " in caplog.text + assert "(LEILA) Connecting to " in caplog.text + assert "(LEILA) 1 file(s) on LEILA server to copy to NSDROP" in caplog.text + assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/leila`" in caplog.text + assert "(NSDROP) 1 file(s) copied to `NSDROP/vendor_records/leila`" in caplog.text + assert "(LEILA) Client session closed" in caplog.text + assert "(NSDROP) Client session closed" in caplog.text def test_get_vendor_files_no_files(mock_Client, caplog): get_vendor_files(vendors=["eastview"], days=1, hours=1) - assert "(NSDROP) Connected to server" in caplog.text - assert "(EASTVIEW) Connected to server" in caplog.text - assert "(EASTVIEW) Retrieving list of files in " in caplog.text - assert "(EASTVIEW) 0 recent file(s) in `eastview_src`" in caplog.text - assert "(EASTVIEW) Closing client session" in caplog.text + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text + assert "(EASTVIEW) Connecting to ftp.eastview.com via SFTP client" in caplog.text + assert "(EASTVIEW) 0 file(s) on EASTVIEW server to copy to NSDROP" in caplog.text + assert "(EASTVIEW) Client session closed" in caplog.text + assert "(NSDROP) Client session closed" in caplog.text def test_get_single_file_no_validation(mock_Client, stub_file_info, caplog): - vendor_client = connect("eastview") + vendor_client = connect("midwest_nypl") nsdrop_client = connect("nsdrop") get_single_file( - vendor="eastview", + vendor="midwest_nypl", file=stub_file_info, vendor_client=vendor_client, nsdrop_client=nsdrop_client, ) - assert "(EASTVIEW) Connected to server" in caplog.text - assert "(NSDROP) Connected to server" in caplog.text - assert "(EASTVIEW) Fetching foo.mrc from `eastview_src`" in caplog.text assert ( - "(NSDROP) Checking for file in `NSDROP/vendor_records/eastview` before writing" + "(MIDWEST_NYPL) Connecting to ftp.midwest_nypl.com via FTP client" + in caplog.text + ) + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text + assert "(NSDROP) Validating bar file: foo.mrc" not in caplog.text + assert ( + "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/midwest_nypl`" in caplog.text ) - assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/eastview`" in caplog.text def test_get_single_file_with_validation(mock_Client, stub_file_info, caplog): @@ -98,45 +56,19 @@ def test_get_single_file_with_validation(mock_Client, stub_file_info, caplog): vendor_client=vendor_client, nsdrop_client=nsdrop_client, ) - assert "(EASTVIEW) Connected to server" in caplog.text - assert "(NSDROP) Connected to server" in caplog.text - assert "(EASTVIEW) Fetching foo.mrc from `eastview_src`" in caplog.text + assert "(EASTVIEW) Connecting to ftp.eastview.com via SFTP client" in caplog.text + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text assert "(NSDROP) Validating eastview file: foo.mrc" in caplog.text - assert ( - "(NSDROP) Checking for file in `NSDROP/vendor_records/eastview` before writing" - in caplog.text - ) assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/eastview`" in caplog.text def test_validate_files(mock_Client, caplog): validate_files(vendor="eastview", files=None) - assert ( - "(NSDROP) Retrieving list of files in `NSDROP/vendor_records/eastview`" - in caplog.text - ) - assert "(NSDROP) 1 file(s) in `NSDROP/vendor_records/eastview`" in caplog.text - assert ( - "(NSDROP) Fetching foo.mrc from `NSDROP/vendor_records/eastview`" in caplog.text - ) - assert "(NSDROP) Validating eastview file: foo.mrc" in caplog.text + assert "(NSDROP) Connecting to " in caplog.text + assert "(NSDROP) Validating eastview file: bar.mrc" in caplog.text def test_validate_files_with_list(mock_Client, caplog): - validate_files(vendor="eastview", files=["foo.mrc", "bar.mrc"]) - assert ( - "(NSDROP) Fetching foo.mrc from `NSDROP/vendor_records/eastview`" in caplog.text - ) - assert ( - "(NSDROP) Fetching bar.mrc from `NSDROP/vendor_records/eastview`" in caplog.text - ) + validate_files(vendor="eastview", files=["foo.mrc"]) + assert "(NSDROP) Connecting to " in caplog.text assert "(NSDROP) Validating eastview file: foo.mrc" in caplog.text - - -@pytest.mark.livetest -def test_client_config_live(): - client_list = load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) - assert len(client_list) > 1 - assert "LEILA" in client_list diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index bbc936e..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,74 +0,0 @@ -import logging -import logging.config -import logging.handlers -import os -import pytest -from vendor_file_cli.config import ( - load_vendor_creds, - logger_config, -) - - -def test_logger_config(): - cli_logger = logging.getLogger("vendor_file_cli") - while cli_logger.handlers: - handler = cli_logger.handlers[0] - cli_logger.removeHandler(handler) - handler.close() - logger_config(cli_logger) - config_handlers = cli_logger.handlers - handler_types = [type(i) for i in config_handlers] - assert len(handler_types) == 2 - assert handler_types == [ - logging.StreamHandler, - logging.handlers.RotatingFileHandler, - ] - assert config_handlers[0].level == 10 - assert ( - config_handlers[0].formatter._fmt == "%(asctime)s - %(levelname)s - %(message)s" - ) - - -def test_load_vendor_creds(mocker): - yaml_string = """ - FOO_HOST: foo - FOO_USER: bar - FOO_PASSWORD: baz - FOO_PORT: '21' - FOO_SRC: foo_src - BAR_HOST: foo - BAR_USER: bar - BAR_PASSWORD: baz - BAR_PORT: '22' - BAR_SRC: bar_src - """ - m = mocker.mock_open(read_data=yaml_string) - mocker.patch("builtins.open", m) - - client_list = load_vendor_creds("foo.yaml") - assert len(client_list) == 2 - assert client_list == ["FOO", "BAR"] - assert os.environ["FOO_HOST"] == "foo" - assert os.environ["FOO_USER"] == "bar" - assert os.environ["FOO_PASSWORD"] == "baz" - assert os.environ["FOO_PORT"] == "21" - assert os.environ["FOO_SRC"] == "foo_src" - - -def test_load_vendor_creds_empty_yaml(mocker): - yaml_string = "" - m = mocker.mock_open(read_data=yaml_string) - mocker.patch("builtins.open", m) - - with pytest.raises(ValueError) as exc: - load_vendor_creds("foo.yaml") - assert "No credentials found in config file" in str(exc.value) - - -@pytest.mark.livetest -def test_client_config_live(): - client_list = load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) - assert len(client_list) > 1 - assert "LEILA" in client_list diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..549a024 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,153 @@ +import os +from pymarc import Field, Subfield +import pytest +from file_retriever.connect import Client +from vendor_file_cli.utils import ( + configure_sheet, + connect, + create_logger_dict, + get_control_number, + get_vendor_list, + load_creds, + read_marc_file_stream, + write_data_to_sheet, +) + + +def test_configure_sheet_success(mock_sheet_config): + creds = configure_sheet() + assert creds.token == "foo" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + + +def test_configure_sheet_invalid(mock_sheet_config_creds_invalid): + creds = configure_sheet() + assert creds.token == "foo" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + + +def test_configure_sheet_generate_new_creds(mock_sheet_config_no_creds): + creds = configure_sheet() + assert creds.token == "foo" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + + +def test_connect(mock_Client): + client = connect("leila") + assert client.name == "LEILA" + assert client.host == "ftp.leila.com" + assert client.port == "21" + assert isinstance(client, Client) + assert client.session is not None + + +def test_create_logger_dict(cli_runner): + logger_dict = create_logger_dict() + assert sorted(list(logger_dict["formatters"].keys())) == sorted(["basic", "json"]) + assert sorted(list(logger_dict["handlers"].keys())) == sorted( + ["stream", "file", "loggly"] + ) + assert sorted(list(logger_dict["loggers"].keys())) == sorted( + ["file_retriever", "vendor_file_cli"] + ) + + +def test_get_control_number(stub_record): + control_no = get_control_number(stub_record) + assert control_no == "on1381158740" + + +@pytest.mark.parametrize( + "field", + ["020", "035", "022", "024", "010"], +) +def test_get_control_number_other_tag(stub_record, field): + stub_record.remove_fields("001") + stub_record.add_ordered_field( + Field( + tag=field, + indicators=[" ", " "], + subfields=[ + Subfield(code="a", value="foo"), + ], + ) + ) + control_no = get_control_number(stub_record) + assert control_no == "foo" + + +def test_get_control_number_call_no(stub_record): + stub_record.remove_fields("001") + control_no = get_control_number(stub_record) + assert control_no == "ReCAP 23-100000" + + +def test_get_control_number_none(stub_record): + stub_record.remove_fields("001", "852") + control_no = get_control_number(stub_record) + assert control_no == "None" + + +def test_get_vendor_list(): + vendor_list = get_vendor_list() + assert sorted(vendor_list) == sorted( + ["LEILA", "MIDWEST_NYPL", "BAKERTAYLOR_BPL", "EASTVIEW"] + ) + + +def test_load_creds(mock_open_file): + load_creds(mock_open_file) + assert os.environ["NSDROP_HOST"] == "ftp.nsdrop.com" + assert os.environ["NSDROP_PORT"] == "22" + assert os.environ["LEILA_HOST"] == "ftp.leila.com" + assert os.environ["LEILA_PORT"] == "21" + assert os.environ["LEILA_SRC"] == "leila_src" + assert os.environ["LEILA_DST"] == "NSDROP/vendor_records/leila" + + +def test_load_creds_empty_yaml(mocker): + yaml_string = "" + m = mocker.mock_open(read_data=yaml_string) + mocker.patch("builtins.open", m) + with pytest.raises(ValueError) as exc: + load_creds("foo.yaml") + assert "No credentials found in config file" in str(exc.value) + + +def test_read_marc_file_stream(stub_file): + stream = read_marc_file_stream(stub_file) + assert stream is not None + assert stream.__next__().get_fields("001")[0].data == "on1381158740" + records = [i for i in read_marc_file_stream(stub_file)] + assert len(records) == 1 + + +def test_write_data_to_sheet(mock_sheet_config): + data = write_data_to_sheet({"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}) + keys = data.keys() + assert sorted(list(keys)) == sorted(["spreadsheetId", "tableRange"]) + + +def test_write_data_to_sheet_http_error( + mock_sheet_config, mock_sheet_http_error, caplog +): + data = write_data_to_sheet({"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}) + assert data is None + assert "Error occurred while sending data to google sheet: " in caplog.text + + +def test_write_data_to_sheet_timeout_error( + mock_sheet_config, mock_sheet_timeout_error, caplog +): + data = write_data_to_sheet({"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}) + assert data is None + assert ( + "Error occurred while sending data to google sheet: Connection timed out" + in caplog.text + ) diff --git a/tests/test_validator.py b/tests/test_validator.py index 6ecc29f..ccbbb7d 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,136 +1,124 @@ -from pymarc import Field, Subfield +import datetime import pytest from vendor_file_cli.validator import ( - configure_sheet, - get_control_number, - map_vendor_to_code, - send_data_to_sheet, + get_single_file, + get_vendor_file_list, + validate_file, validate_single_record, ) - - -def test_configure_sheet_success(mock_sheet_config): - creds = configure_sheet() - assert creds.token == "foo" - assert creds.valid is True - assert creds.expired is False - assert creds.refresh_token is not None - - -def test_configure_sheet_invalid(mock_sheet_config_creds_invalid): - creds = configure_sheet() - assert creds.token == "baz" - assert creds.valid is True - assert creds.expired is False - assert creds.refresh_token is not None - - -def test_configure_sheet_generate_new_creds(mock_sheet_config_no_creds): - creds = configure_sheet() - assert creds.token == "foo" - assert creds.valid is True - assert creds.expired is False - assert creds.refresh_token is not None - - -def test_get_control_number(stub_record): - control_no = get_control_number(stub_record) - assert control_no == "on1381158740" - - -@pytest.mark.parametrize( - "field", - ["020", "035", "022", "024", "010"], -) -def test_get_control_number_other_tag(stub_record, field): - print(stub_record) - stub_record.remove_fields("001") - stub_record.add_ordered_field( - Field( - tag=field, - indicators=[" ", " "], - subfields=[ - Subfield(code="a", value="foo"), - ], - ) +from vendor_file_cli.utils import connect + + +@pytest.mark.parametrize("vendor", ["midwest_nypl", "bakertaylor_bpl"]) +def test_get_single_file_no_validation(mock_Client, stub_file_info, vendor, caplog): + vendor_client = connect(vendor) + nsdrop_client = connect("nsdrop") + get_single_file( + vendor=vendor, + file=stub_file_info, + vendor_client=vendor_client, + nsdrop_client=nsdrop_client, + ) + assert ( + f"({vendor.upper()}) Connecting to ftp.{vendor}.com via FTP client" + in caplog.text + ) + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text + assert f"(NSDROP) Validating {vendor} file: foo.mrc" not in caplog.text + assert ( + f"(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/{vendor}`" in caplog.text ) - control_no = get_control_number(stub_record) - assert control_no == "foo" -def test_get_control_number_call_no(stub_record): - stub_record.remove_fields("001") - control_no = get_control_number(stub_record) - assert control_no == "ReCAP 23-100000" +def test_get_single_file_with_validation(mock_Client, stub_file_info, caplog): + vendor_client = connect("eastview") + nsdrop_client = connect("nsdrop") + get_single_file( + vendor="eastview", + file=stub_file_info, + vendor_client=vendor_client, + nsdrop_client=nsdrop_client, + ) + assert "(EASTVIEW) Connecting to ftp.eastview.com via SFTP client" in caplog.text + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text + assert "(NSDROP) Validating eastview file: foo.mrc" in caplog.text + assert "(NSDROP) Writing foo.mrc to `NSDROP/vendor_records/eastview`" in caplog.text + + +def test_get_single_file_bakertaylor_bpl_root(mock_Client, stub_file_info, caplog): + vendor_client = connect("bakertaylor_bpl") + nsdrop_client = connect("nsdrop") + stub_file_info.file_name = "ADDfoo.mrc" + get_single_file( + vendor="bakertaylor_bpl", + file=stub_file_info, + vendor_client=vendor_client, + nsdrop_client=nsdrop_client, + ) + assert ( + "(BAKERTAYLOR_BPL) Connecting to ftp.bakertaylor_bpl.com via FTP client" + in caplog.text + ) + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text + assert ( + "(NSDROP) Writing ADDfoo.mrc to `NSDROP/vendor_records/bakertaylor_bpl`" + in caplog.text + ) -def test_get_control_number_none(stub_record): - stub_record.remove_fields("001", "852") - control_no = get_control_number(stub_record) - assert control_no == "None" +@pytest.mark.parametrize("vendor", ["midwest_nypl", "bakertaylor_bpl"]) +def test_get_vendor_file_list(mock_Client, vendor, caplog): + file_list = [] + with connect("nsdrop") as nsdrop_client: + with connect(vendor) as vendor_client: + file_list.extend( + get_vendor_file_list( + vendor=vendor, + timedelta=datetime.timedelta(days=1), + nsdrop_client=nsdrop_client, + vendor_client=vendor_client, + ) + ) + assert len(file_list) == 0 + assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text + assert ( + f"({vendor.upper()}) Connecting to ftp.{vendor}.com via FTP client" + in caplog.text + ) + assert f"({vendor.upper()}) Client session closed" in caplog.text + assert "(NSDROP) Client session closed" in caplog.text @pytest.mark.parametrize( - "vendor, code", + "vendor, vendor_code", [ + ("amalivre_sasb", "AUXAM"), ("eastview", "EVP"), ("leila", "LEILA"), - ("amalivre_sasb", "AUXAM"), - ("amalivre_lpa", "AUXAM"), - ("amalivre_schomburg", "AUXAM"), - ("amalivre_rl", "AUXAM"), + ("midwest_nypl", "MIDWEST_NYPL"), ], ) -def test_map_vendor_to_code(vendor, code): - assert map_vendor_to_code(vendor) == code - - -def test_send_data_to_sheet(mock_sheet_config, mock_sheet_resource): - creds = configure_sheet() - data = send_data_to_sheet(vendor_code="EVP", values=[["foo", "bar"]], creds=creds) - assert sorted(list(data.keys())) == sorted( +def test_validate_file(stub_file, vendor, vendor_code): + out_dict = validate_file(stub_file, vendor) + assert sorted([i for i in out_dict.keys()]) == sorted( [ - "spreadsheetId", - "tableRange", + "valid", + "record_number", + "control_number", + "file_name", + "validation_date", + "vendor_code", ] ) + assert out_dict["vendor_code"] == [vendor_code] -def test_send_data_to_sheet_http_error( - caplog, mock_sheet_config, mock_sheet_http_error -): - creds = configure_sheet() - data = send_data_to_sheet(vendor_code="EVP", values=[["foo", "bar"]], creds=creds) - assert data is None - assert "Error occurred while sending data to google sheet: " in caplog.text - - -def test_send_data_to_sheet_timeout_error( - caplog, mock_sheet_config, mock_sheet_timeout_error -): - creds = configure_sheet() - data = send_data_to_sheet(vendor_code="EVP", values=[["foo", "bar"]], creds=creds) - assert data is None - assert "Error occurred while sending data to google sheet: " in caplog.text - - -def test_validate_single_record(stub_record): - assert validate_single_record(stub_record) == { - "valid": True, - "error_count": "", - "missing_field_count": "", - "missing_fields": "", - "extra_field_count": "", - "extra_fields": "", - "invalid_field_count": "", - "invalid_fields": "", - "order_item_mismatches": "", - } +def test_validate_single_record(mock_valid_record): + assert validate_single_record(mock_valid_record) == {"valid": True} -def test_validate_single_record_invalid(stub_record): - stub_record.remove_fields("960") - assert validate_single_record(stub_record) == { +def test_validate_single_record_invalid(mock_invalid_record): + assert validate_single_record(mock_invalid_record) == { "valid": False, "error_count": 1, "missing_field_count": 1, diff --git a/vendor_file_cli/__init__.py b/vendor_file_cli/__init__.py index d573df2..6cca5cd 100644 --- a/vendor_file_cli/__init__.py +++ b/vendor_file_cli/__init__.py @@ -1,16 +1,17 @@ import logging import logging.config -import os import click from vendor_file_cli.commands import get_vendor_files, validate_files -from vendor_file_cli.config import load_vendor_creds, logger_config +from vendor_file_cli.utils import create_logger_dict, get_vendor_list + +logger = logging.getLogger("vendor_file_cli") @click.group def vendor_file_cli() -> None: - """CLI for retrieving files from vendor FTP/SFTP servers.""" - logger = logging.getLogger("vendor_file_cli") - logger_config(logger) + """CLI for retrieving and validating files from vendor FTP/SFTP servers.""" + logger_dict = create_logger_dict() + logging.config.dictConfig(logger_dict) pass @@ -20,12 +21,12 @@ def vendor_file_cli() -> None: ) def get_all_vendor_files() -> None: """ - Retrieve files from vendor server not present in vendor's NSDROP directory. Creates - list of files on vendor server and list of files in NSDROP directory. Copies files - from vendor server to NSDROP directory if they are not already present. Validates - files for Eastview, Leila, and Amalivre (SASB) before copying them to NSDROP and - writes output of validation to google sheet. Files are copied to - NSDROP/vendor_records/{vendor_name}. + Retrieve files from vendor server which were created in last year and are not + present in vendor's NSDROP directory. Creates list of files on vendor server + and list of files in NSDROP directory. Copies files from vendor server to NSDROP + directory if they are not already present. Validates files for Eastview, Leila, + and Amalivre (SASB) before copying them to NSDROP and writes output of validation + to google sheet. Files are copied to NSDROP/vendor_records/{vendor_name}. Args: None @@ -34,18 +35,14 @@ def get_all_vendor_files() -> None: None """ - vendor_list = load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) - get_vendor_files(vendors=vendor_list) + vendor_list = get_vendor_list() + get_vendor_files(vendors=vendor_list, days=365) @vendor_file_cli.command("available-vendors", short_help="List all configured vendors.") def get_available_vendors() -> None: """List all configured vendors.""" - vendor_list = load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) + vendor_list = get_vendor_list() click.echo(f"Available vendors: {vendor_list}") @@ -78,10 +75,6 @@ def validate_vendor_files(vendor: str, file: str) -> None: Returns: None """ - if not os.getenv("GITHUB_ACTIONS"): - load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) validate_files(vendor=vendor, files=[file]) @@ -132,9 +125,7 @@ def get_recent_vendor_files(vendor: str, days: int, hours: int) -> None: None """ - all_available_vendors = load_vendor_creds( - os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml") - ) + all_available_vendors = get_vendor_list() if "all" in vendor: vendor_list = all_available_vendors else: diff --git a/vendor_file_cli/commands.py b/vendor_file_cli/commands.py index e1b8769..4fa77f3 100644 --- a/vendor_file_cli/commands.py +++ b/vendor_file_cli/commands.py @@ -1,36 +1,18 @@ -"""This module contains functions to use to configure the CLI, logger, and env vars.""" +"""This module contains functions in CLI commands.""" import logging import logging.handlers import datetime import os -from file_retriever.connect import Client -from file_retriever.file import FileInfo -from vendor_file_cli.validator import validate_file +from vendor_file_cli.validator import ( + validate_file, + get_single_file, + get_vendor_file_list, +) +from vendor_file_cli.utils import connect -logger = logging.getLogger("vendor_file_cli") - - -def connect(name: str) -> Client: - """ - Create and return a `Client` object for the specified server - using credentials stored in env vars. - - Args: - name: name of server (eg. EASTVIEW, NSDROP) - - Returns: - a `Client` object for the specified server - """ - client_name = name.upper() - return Client( - name=client_name, - username=os.environ[f"{client_name}_USER"], - password=os.environ[f"{client_name}_PASSWORD"], - host=os.environ[f"{client_name}_HOST"], - port=os.environ[f"{client_name}_PORT"], - ) +logger = logging.getLogger(__name__) def get_vendor_files( @@ -56,63 +38,37 @@ def get_vendor_files( None """ - nsdrop = connect("nsdrop") - timedelta = datetime.timedelta(days=days, hours=hours) for vendor in vendors: - with connect(vendor) as client: - file_list = client.list_file_info( - time_delta=timedelta, - remote_dir=os.environ[f"{vendor.upper()}_SRC"], - ) - files = nsdrop.check_file_list( - files=file_list, dir=os.environ[f"{vendor.upper()}_DST"], remote=True - ) - for file in files: - get_single_file( + vendor_dst = os.environ[f"{vendor.upper()}_DST"] + with connect("nsdrop") as nsdrop_client: + with connect(vendor) as vendor_client: + files = get_vendor_file_list( vendor=vendor, - file=file, - vendor_client=client, - nsdrop_client=nsdrop, + timedelta=datetime.timedelta(days=days, hours=hours), + nsdrop_client=nsdrop_client, + vendor_client=vendor_client, ) - nsdrop.close() - - -def get_single_file( - vendor: str, file: FileInfo, vendor_client: Client, nsdrop_client: Client -) -> None: - """ - Get a file from a vendor server and put it in the NSDROP directory. - Validates the file if the vendor is EASTVIEW, LEILA, or AMALIVRE_SASB. - - Args: - vendor: name of vendor - file: `FileInfo` object representing the file to retrieve - vendor_client: `Client` object for the vendor server - nsdrop_client: `Client` object for the NSDROP server - - Returns: - None - - """ - fetched_file = vendor_client.get_file( - file=file, remote_dir=os.environ[f"{vendor.upper()}_SRC"] - ) - if vendor.upper() in ["EASTVIEW", "LEILA", "AMALIVRE_SASB"]: - logger.info( - f"({nsdrop_client.name}) Validating {vendor} file: {fetched_file.file_name}" - ) - validate_file(file_obj=fetched_file, vendor=vendor, write=True) - nsdrop_client.put_file( - file=fetched_file, - dir=os.environ[f"{vendor.upper()}_DST"], - remote=True, - check=True, - ) + logger.info( + f"({vendor_client.name}) {len(files)} file(s) on " + f"{vendor_client.name} server to copy to NSDROP" + ) + for file in files: + get_single_file( + vendor=vendor, + file=file, + vendor_client=vendor_client, + nsdrop_client=nsdrop_client, + ) + if len(files) > 0: + logger.info( + f"({nsdrop_client.name}) {len(files)} file(s) " + f"copied to `{vendor_dst}`" + ) def validate_files(vendor: str, files: list | None) -> None: """ - Validate files on NSDROP for a speific vendor. + Validate files on NSDROP for a specific vendor. Args: vendor: @@ -137,6 +93,6 @@ def validate_files(vendor: str, files: list | None) -> None: for file in vendor_file_list: client = connect("nsdrop") file_obj = client.get_file(file=file, remote_dir=file_dir) - logger.info(f"({client.name}) Validating {vendor} file: {file_obj.file_name}") - validate_file(file_obj=file_obj, vendor=vendor, write=True) + logger.debug(f"({client.name}) Validating {vendor} file: {file_obj.file_name}") + validate_file(file_obj=file_obj, vendor=vendor) client.close() diff --git a/vendor_file_cli/config.py b/vendor_file_cli/config.py deleted file mode 100644 index 4c103d7..0000000 --- a/vendor_file_cli/config.py +++ /dev/null @@ -1,63 +0,0 @@ -import logging -import logging.handlers -import os -import yaml - -logger = logging.getLogger("vendor_file_cli") - - -def load_vendor_creds(config_path: str) -> list[str]: - """ - Read config file with credentials and set creds as environment variables. - Returns a list of vendors whose FTP/SFTP credentials are stored in the - config file and have been added to env vars. NSDROP is excluded from this list. - - Args: - config_path (str): Path to the yaml file with credendtials. - - Returns: - list of names of servers (eg. EASTVIEW, LEILA) whose credentials are - stored in the config file and have been added to env vars - """ - with open(config_path, "r") as file: - config = yaml.safe_load(file) - if config is None: - raise ValueError("No credentials found in config file.") - vendor_list = [ - i.split("_HOST")[0] - for i in config.keys() - if i.endswith("_HOST") and "NSDROP" not in i - ] - for k, v in config.items(): - os.environ[k] = v - for vendor in vendor_list: - os.environ[f"{vendor}_DST"] = f"NSDROP/vendor_records/{vendor.lower()}" - return vendor_list - - -def logger_config(app_logger: logging.Logger) -> None: - """ - Create and return dict for logger configuration. - """ - root_logger = logging.getLogger("file_retriever") - root_logger.setLevel(logging.DEBUG) - app_logger.setLevel(logging.DEBUG) - - stream_handler = logging.StreamHandler() - file_handler = logging.handlers.RotatingFileHandler( - filename="vendor_file_cli.log", - maxBytes=10 * 1024 * 1024, - backupCount=5, - encoding="utf8", - ) - formatter = logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(message)s") - - stream_handler.setLevel(logging.DEBUG) - stream_handler.setFormatter(formatter) - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(formatter) - - app_logger.addHandler(stream_handler) - app_logger.addHandler(file_handler) - root_logger.addHandler(stream_handler) - root_logger.addHandler(file_handler) diff --git a/vendor_file_cli/utils.py b/vendor_file_cli/utils.py new file mode 100644 index 0000000..5668d08 --- /dev/null +++ b/vendor_file_cli/utils.py @@ -0,0 +1,240 @@ +import logging +import os +import yaml +from typing import Generator, Union +from googleapiclient.discovery import build # type: ignore +from googleapiclient.errors import HttpError # type: ignore +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore +import pandas as pd +from pymarc import MARCReader, Record +from file_retriever.connect import Client +from file_retriever.file import File + +logger = logging.getLogger(__name__) + + +def configure_sheet() -> Credentials: + """ + Get or update credentials for google sheets API and save token to file. + + Args: + None + + Returns: + google.oauth2.credentials.Credentials: Credentials object for google sheet API. + """ + scopes = ["https://www.googleapis.com/auth/spreadsheets"] + cred_path = os.path.join( + os.environ["USERPROFILE"], ".cred/.google/desktop-app.json" + ) + token_path = os.path.join(os.environ["USERPROFILE"], ".cred/.google/token.json") + + creds = Credentials.from_authorized_user_file(token_path, scopes) + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file(cred_path, scopes) + creds = flow.run_local_server() + with open(token_path, "w") as token: + token.write(creds.to_json()) + return creds + + +def connect(name: str) -> Client: + """ + Create and return a `Client` object for the specified server using + credentials stored in env vars. + + Args: + name: name of server (eg. EASTVIEW, NSDROP) + + Returns: + a `Client` object for the specified server + """ + client_name = name.upper() + return Client( + name=client_name, + username=os.environ[f"{client_name}_USER"], + password=os.environ[f"{client_name}_PASSWORD"], + host=os.environ[f"{client_name}_HOST"], + port=os.environ[f"{client_name}_PORT"], + ) + + +def create_logger_dict() -> dict: + """Create a dictionary to configure logger.""" + load_creds(os.path.join(os.environ["USERPROFILE"], ".cred/.sftp/connections.yaml")) + loggly_token = os.environ["LOGGLY_TOKEN"] + return { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "basic": { + "format": "%(app)s-%(asctime)s-%(filename)s-%(lineno)d-%(levelname)s-%(message)s", # noqa: E501 + "defaults": {"app": "vendor_file_cli"}, + }, + "json": { + "format": '{"app": "vendor_file_cli", "ascitime": "%(asctime)s", "fileName": "%(name)s", "lineno":"%(lineno)d", "levelname": "%(levelname)s", "message": "%(message)s"}', # noqa: E501 + }, + }, + "handlers": { + "stream": { + "class": "logging.StreamHandler", + "formatter": "basic", + "level": "DEBUG", + }, + "file": { + "class": "logging.handlers.RotatingFileHandler", + "formatter": "basic", + "level": "DEBUG", + "filename": "vendor_file_cli.log", + "maxBytes": 10 * 1024 * 1024, + "backupCount": 5, + }, + "loggly": { + "class": "loggly.handlers.HTTPSHandler", + "formatter": "json", + "level": "INFO", + "url": f"https://logs-01.loggly.com/inputs/{loggly_token}/tag/python", + }, + }, + "loggers": { + "file_retriever": { + "handlers": ["stream", "file", "loggly"], + "level": "DEBUG", + "propagate": False, + }, + "vendor_file_cli": { + "handlers": ["stream", "file", "loggly"], + "level": "DEBUG", + "propagate": False, + }, + }, + } + + +def get_control_number(record: Record) -> str: + """Get control number from MARC record to add to validation output.""" + field = record.get("001", None) + if field is not None: + control_number = field.data + if control_number is not None: + return control_number + field_subfield_pairs = [ + ("035", "a"), + ("020", "a"), + ("010", "a"), + ("022", "a"), + ("024", "a"), + ("852", "h"), + ] + for f, s in field_subfield_pairs: + while record.get(f, None) is not None: + field = record.get(f, None) + if field is not None: + subfield = field.get(s) + if subfield is not None: + return subfield + return "None" + + +def load_creds(config_path: str) -> None: + """ + Read yaml file with credentials and set as environment variables. + + Args: + config_path: Path to .yaml file with credentials. + + """ + with open(config_path, "r") as file: + config = yaml.safe_load(file) + if config is None: + raise ValueError("No credentials found in config file.") + for k, v in config.items(): + os.environ[k] = str(v) + vendor_list = get_vendor_list() + for vendor in vendor_list: + os.environ[f"{vendor}_DST"] = f"NSDROP/vendor_records/{vendor.lower()}" + + +def get_vendor_list() -> list[str]: + """ + Read environment variables and return a list of vendors whose + credentials have been loaded. + + Returns: + list of vendors (eg. EASTVIEW, LEILA) whose credentials have been loaded. + """ + hosts = [i for i in os.environ.keys() if i.endswith("_HOST")] + return [i.split("_HOST")[0] for i in hosts if "NSDROP" not in i] + + +def read_marc_file_stream(file_obj: File) -> Generator[Record, None, None]: + """Read the records contained within filestream of File object using pymarc""" + fh = file_obj.file_stream.getvalue() + reader = MARCReader(fh) + for record in reader: + yield record + + +def write_data_to_sheet(values: dict) -> Union[dict, None]: + """ + Write output of validation to google sheet. + + Args: + values: dictionary containing validation output for a file. + + Returns: + dictionary containing response from google sheet API. + """ + vendor_code = values["vendor_code"][0] + creds = configure_sheet() + + df = pd.DataFrame( + values, + columns=[ + "validation_date", + "file_name", + "vendor_code", + "record_number", + "control_number", + "valid", + "error_count", + "missing_field_count", + "missing_fields", + "extra_field_count", + "extra_fields", + "invalid_field_count", + "invalid_fields", + "order_item_mismatches", + ], + ) + df.fillna("", inplace=True) + + body = { + "majorDimension": "ROWS", + "range": f"{vendor_code.upper()}!A1:O10000", + "values": df.values.tolist(), + } + try: + service = build("sheets", "v4", credentials=creds) + result = ( + service.spreadsheets() + .values() + .append( + spreadsheetId="1ZYuhMIE1WiduV98Pdzzw7RwZ08O-sJo7HJihWVgSOhQ", + range=f"{vendor_code.upper()}!A1:O10000", + valueInputOption="USER_ENTERED", + insertDataOption="INSERT_ROWS", + body=body, + includeValuesInResponse=True, + ) + .execute() + ) + return result + except (HttpError, TimeoutError) as e: + logger.error(f"Error occurred while sending data to google sheet: {e}") + return None diff --git a/vendor_file_cli/validator.py b/vendor_file_cli/validator.py index 98da70d..293e38d 100644 --- a/vendor_file_cli/validator.py +++ b/vendor_file_cli/validator.py @@ -2,138 +2,180 @@ import datetime import logging import os -from typing import Any, Generator -from google.auth.transport.requests import Request -from google.oauth2.credentials import Credentials -from google_auth_oauthlib.flow import InstalledAppFlow -from googleapiclient.discovery import build -from googleapiclient.errors import HttpError -import pandas as pd +from typing import Any, List, Union from pydantic import ValidationError -from pymarc import MARCReader, Record -from file_retriever.file import File +from pymarc import Record +from file_retriever.file import File, FileInfo +from file_retriever.connect import Client from record_validator.marc_models import RecordModel from record_validator.marc_errors import MarcValidationError +from vendor_file_cli.utils import ( + read_marc_file_stream, + get_control_number, + write_data_to_sheet, +) -logger = logging.getLogger("vendor_file_cli") +logger = logging.getLogger(__name__) -def configure_sheet() -> Credentials: +def get_single_file( + vendor: str, file: FileInfo, vendor_client: Client, nsdrop_client: Client +) -> File: """ - Get or update credentials for google sheets API and save token to file. + Get a file from a vendor server and copy it to the vendor's NSDROP directory. + Validates the file if the vendor is EASTVIEW, LEILA, or AMALIVRE_SASB. Args: + vendor: name of vendor + file: `FileInfo` object representing the file to retrieve + vendor_client: `Client` object for the vendor server + nsdrop_client: `Client` object for the NSDROP server + + Returns: None + """ + if ( + file.file_name.startswith("ADD") or file.file_name.startswith("NEW") + ) and vendor.lower() == "bakertaylor_bpl": + remote_dir = "" + else: + remote_dir = os.environ[f"{vendor.upper()}_SRC"] + nsdrop_dir = os.environ[f"{vendor.upper()}_DST"] + fetched_file = vendor_client.get_file(file=file, remote_dir=remote_dir) + if vendor.upper() in ["EASTVIEW", "LEILA", "AMALIVRE_SASB"]: + logger.debug( + f"({nsdrop_client.name}) Validating {vendor} file: {fetched_file.file_name}" + ) + output = validate_file(file_obj=fetched_file, vendor=vendor) + write_data_to_sheet(output) + nsdrop_client.put_file(file=fetched_file, dir=nsdrop_dir, remote=True, check=True) + return fetched_file + + +def get_vendor_file_list( + vendor: str, + timedelta: datetime.timedelta, + nsdrop_client: Client, + vendor_client: Client, +) -> list[FileInfo]: + """ + Create list of files to retrieve from vendor server. Compares list of files + on vendor server to list of files in vendor's directory on NSDROP. Only + includes files that are not already present in the NSDROP directory. The + list of files is filtered based on the timedelta provided. + + If the vendor is BAKERTAYLOR_NYPL, the root directory of the is also checked + for files that are not in the NSDROP directory. This is because the + BAKERTAYLOR_NYPL server has multiple directories that contain files that + need to be copied to NSDROP. + + If the vendor is MIDWEST_NYPL, the directories are compared using just + the file names and then a list of FileInfo objects is created from the + list of file names. This is due to the fact that there are nearly 10k files + on the MIDWEST_NYPL server. + + Args: + + vendor: name of vendor + timedelta: timedelta object representing the time period to retrieve files from + nsdrop_client: `Client` object for the NSDROP server + vendor_client: `Client` object for the vendor server + Returns: - google.oauth2.credentials.Credentials: Credentials object for google sheet API. + list of `FileInfo` objects representing files to retrieve from the vendor server """ - scopes = ["https://www.googleapis.com/auth/spreadsheets"] - cred_path = os.path.join( - os.environ["USERPROFILE"], ".cred/.google/desktop-app.json" - ) - token_path = os.path.join(os.environ["USERPROFILE"], ".cred/.google/token.json") - - if os.path.exists(token_path): - creds = Credentials.from_authorized_user_file(token_path, scopes) - if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - flow = InstalledAppFlow.from_client_secrets_file(cred_path, scopes) - creds = flow.run_local_server() - with open(token_path, "w") as token: - token.write(creds.to_json()) - return creds - - -def get_control_number(record: Record) -> str: - """Get control number from MARC record to output to google sheet.""" - field = record.get("001", None) - if field is not None: - control_number = field.data - if control_number is not None: - return control_number - field_subfield_pairs = [ - ("035", "a"), - ("020", "a"), - ("010", "a"), - ("022", "a"), - ("024", "a"), - ("852", "h"), + nsdrop_files: Union[List[FileInfo], List[str]] + vendor_files: Union[List[FileInfo], List[str]] + + today = datetime.datetime.now(tz=datetime.timezone.utc) + src_dir = os.environ[f"{vendor.upper()}_SRC"] + dst_dir = os.environ[f"{vendor.upper()}_DST"] + if vendor.lower() == "midwest_nypl": + nsdrop_files = nsdrop_client.list_files(remote_dir=dst_dir) + vendor_files = vendor_client.list_files(remote_dir=src_dir) + + files_to_check = [ + i + for i in vendor_files + if i.endswith(".mrc") + and "ALL" in i + and int(i.split("_ALL")[0][-4:]) >= 2024 + and int(i.split("_ALL")[0][-8:-6]) >= 7 + and i not in nsdrop_files + ] + file_data = [ + vendor_client.get_file_info(file_name=i, remote_dir=src_dir) + for i in files_to_check + ] + else: + nsdrop_files = nsdrop_client.list_file_info(dst_dir) + vendor_files = vendor_client.list_file_info(src_dir) + file_data = [ + i + for i in vendor_files + if i.file_name not in [j.file_name for j in nsdrop_files] + ] + if vendor.lower() == "bakertaylor_bpl": + other_files = vendor_client.list_file_info("") + file_data.extend( + [ + i + for i in other_files + if i.file_name not in [j.file_name for j in nsdrop_files] + ] + ) + files_to_get = [ + i + for i in file_data + if datetime.datetime.fromtimestamp(i.file_mtime, tz=datetime.timezone.utc) + >= today - timedelta ] - for f, s in field_subfield_pairs: - while record.get(f, None) is not None: - field = record.get(f, None) - if field is not None: - subfield = field.get(s) - if subfield is not None: - return subfield - return "None" - - -def map_vendor_to_code(vendor: str) -> str: - """Map vendor name to vendor code for output to google sheet.""" - vendor_map = { - "EASTVIEW": "EVP", - "LEILA": "LEILA", - "AMALIVRE_SASB": "AUXAM", - "AMALIVRE_LPA": "AUXAM", - "AMALIVRE_SCHOMBURG": "AUXAM", - "AMALIVRE_RL": "AUXAM", - } - return vendor_map[vendor.upper()] - - -def read_marc_file_stream(file_obj: File) -> Generator[Record, None, None]: - """Read the filestream within a File object using pymarc""" - fh = file_obj.file_stream.getvalue() - reader = MARCReader(fh) - for record in reader: - yield record + return files_to_get -def send_data_to_sheet(vendor_code: str, values: list, creds: Credentials): +def validate_file(file_obj: File, vendor: str) -> dict: """ - A function to write data to a google sheet for a specific vendor. The function - uses the google sheets API to write data to the sheet. The function takes the - vendor code, the values to write to the sheet, and the credentials for the google - sheet API. + Validate a file of MARC records and output to google sheet. Args: - - vendor_code: the vendor code for the vendor to write data for. - values: a list of values to write to the google sheet. - creds: the credentials for the google sheets API as a `Credentials` object. + file_obj: `File` object representing the file to validate. + vendor: name of vendor to validate file for. + write: whether to write the validation results to the google sheet. Returns: - the response from the google sheets API as a dictionary. + dictionary containing validation output for the file. + """ - body = { - "majorDimension": "ROWS", - "range": f"{vendor_code.upper()}!A1:O10000", - "values": values, - } - try: - service = build("sheets", "v4", credentials=creds) - - result = ( - service.spreadsheets() - .values() - .append( - spreadsheetId="1ZYuhMIE1WiduV98Pdzzw7RwZ08O-sJo7HJihWVgSOhQ", - range=f"{vendor_code.upper()}!A1:O10000", - valueInputOption="USER_ENTERED", - insertDataOption="INSERT_ROWS", - body=body, - includeValuesInResponse=True, - ) - .execute() + if "AMALIVRE" in vendor.upper(): + vendor_code = "AUXAM" + elif "EASTVIEW" in vendor.upper(): + vendor_code = "EVP" + elif "LEILA" in vendor.upper(): + vendor_code = "LEILA" + else: + vendor_code = vendor.upper() + record_count = len([i for i in read_marc_file_stream(file_obj)]) + reader = read_marc_file_stream(file_obj) + record_n = 1 + out_dict = defaultdict(list) + for record in reader: + validation_data = validate_single_record(record) + validation_data.update( + { + "record_number": f"{record_n} of {record_count}", + "control_number": get_control_number(record), + "file_name": file_obj.file_name, + "vendor_code": vendor_code, + "validation_date": datetime.datetime.today().strftime( + "%Y-%m-%d %I:%M:%S" + ), + } ) - return result - except (HttpError, TimeoutError) as e: - logger.error(f"Error occurred while sending data to google sheet: {e}") - return None + for k, v in validation_data.items(): + out_dict[k].append(str(v)) + record_n += 1 + return out_dict def validate_single_record(record: Record) -> dict[str, Any]: @@ -148,19 +190,10 @@ def validate_single_record(record: Record) -> dict[str, Any]: Returns: dictionary with validation output. """ + out: dict[str, Any] try: RecordModel(leader=str(record.leader), fields=record.fields) - out = { - "valid": True, - "error_count": "", - "missing_field_count": "", - "missing_fields": "", - "extra_field_count": "", - "extra_fields": "", - "invalid_field_count": "", - "invalid_fields": "", - "order_item_mismatches": "", - } + out = {"valid": True} except ValidationError as e: out = {"valid": False} marc_errors = MarcValidationError(e.errors()) @@ -173,66 +206,3 @@ def validate_single_record(record: Record) -> dict[str, Any]: } ) return out - - -def validate_file(file_obj: File, vendor: str, write: bool) -> None: - """ - Validate a file of MARC records and output to google sheet. - - Args: - file_obj: `File` object representing the file to validate. - vendor: name of vendor to validate file for. - write: whether to write the validation results to the google sheet. - - Returns: - None - - """ - if vendor.upper() in ["EASTVIEW", "LEILA", "AMALIVRE_SASB"]: - vendor_code = map_vendor_to_code(vendor) - record_count = len([record for record in read_marc_file_stream(file_obj)]) - reader = read_marc_file_stream(file_obj) - record_n = 1 - out_dict = defaultdict(list) - for record in reader: - validation_data = validate_single_record(record) - validation_data.update( - { - "record_number": f"{record_n} of {record_count}", - "control_number": get_control_number(record), - "file_name": file_obj.file_name, - "vendor_code": vendor_code, - "validation_date": datetime.datetime.today().strftime( - "%Y-%m-%d %I:%M:%S" - ), - } - ) - for k, v in validation_data.items(): - out_dict[k].append(str(v)) - record_n += 1 - df = pd.DataFrame( - out_dict, - columns=[ - "validation_date", - "file_name", - "vendor_code", - "record_number", - "control_number", - "valid", - "error_count", - "missing_field_count", - "missing_fields", - "extra_field_count", - "extra_fields", - "invalid_field_count", - "invalid_fields", - "order_item_mismatches", - ], - ) - df.fillna("", inplace=True) - if write is True: - send_data_to_sheet( - vendor_code, - df.values.tolist(), - configure_sheet(), - )