From f3b402b300952b34a02abf4c4c44eb4f3991246e Mon Sep 17 00:00:00 2001 From: Charlotte Kostelic Date: Fri, 8 Nov 2024 12:01:17 -0500 Subject: [PATCH] Google sheet error handling (#13) * simplified and renamed fixtures for clarity * added test flag for all-vendor-files command * refactored code to handle the google sheet errors * added 3.13 to unit-tests.yaml --- .github/workflows/unit-tests.yaml | 2 +- tests/conftest.py | 39 +++++++++++--------- tests/test_cli.py | 12 +++++++ tests/test_utils.py | 59 ++++++++++++++++++++++++++----- tests/test_validator.py | 3 ++ vendor_file_cli/__init__.py | 18 ++++++++-- vendor_file_cli/commands.py | 2 ++ vendor_file_cli/utils.py | 48 ++++++++++++++++--------- vendor_file_cli/validator.py | 10 ++++-- 9 files changed, 144 insertions(+), 49 deletions(-) diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 42de978..b32a69c 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version}} diff --git a/tests/conftest.py b/tests/conftest.py index fbcfe7d..6fcb937 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -190,21 +190,7 @@ def unset_env_var(monkeypatch, mock_vendor_creds) -> None: @pytest.fixture def cli_runner(monkeypatch, stub_client) -> CliRunner: runner = CliRunner() - - def mock_logging(*args, **kwargs): - logger_dict = {"version": 1, "disable_existing_loggers": False} - str_format = ( - "vendor_file_cli-%(asctime)s-%(filename)s-%(levelname)s-%(message)s" - ) - handler = {"class": "StreamHandler", "formatter": "basic", "level": "DEBUG"} - logger_dict.update({"formatters": {"basic": {"format": str_format}}}) - logger_dict.update({"handlers": {"stream": handler}}) - logger_dict.update({"loggers": {}}) - logger_dict["loggers"] = {"file_retriever": {"handlers": ["stream"]}} - logger_dict["loggers"] = {"vendor_file_cli": {"handlers": ["stream"]}} - return logger_dict - - monkeypatch.setattr("logging.config.dictConfig", mock_logging) + monkeypatch.setattr("logging.config.dictConfig", lambda *args, **kwargs: None) return runner @@ -251,7 +237,7 @@ def build_sheet(*args, **kwargs): @pytest.fixture -def mock_sheet_config_creds_invalid(monkeypatch, mock_sheet_config): +def mock_sheet_config_expired_creds(monkeypatch, mock_sheet_config): monkeypatch.setattr(MockCreds, "valid", False) monkeypatch.setattr(MockCreds, "expired", True) @@ -268,6 +254,16 @@ def mock_sheet_config_no_creds(monkeypatch, mock_sheet_config): ) +@pytest.fixture +def mock_sheet_config_invalid_creds(monkeypatch, mock_sheet_config): + def mock_error(*args, **kwargs): + raise ValueError + + monkeypatch.setattr( + "google.oauth2.credentials.Credentials.from_authorized_user_info", mock_error + ) + + class MockResource: def __init__(self): self.spreadsheetId = "foo" @@ -289,7 +285,16 @@ def values(self, *args, **kwargs): @pytest.fixture def mock_sheet_timeout_error(monkeypatch): def mock_error(*args, **kwargs): - raise TimeoutError("Connection timed out") + raise TimeoutError + + monkeypatch.setattr("googleapiclient.discovery.build", mock_error) + monkeypatch.setattr("googleapiclient.discovery.build_from_document", mock_error) + + +@pytest.fixture +def mock_sheet_auth_error(monkeypatch): + def mock_error(*args, **kwargs): + raise ValueError monkeypatch.setattr("googleapiclient.discovery.build", mock_error) monkeypatch.setattr("googleapiclient.discovery.build_from_document", mock_error) diff --git a/tests/test_cli.py b/tests/test_cli.py index 40c6d05..a63628e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,3 +1,4 @@ +import logging import os from click.testing import CliRunner import pytest @@ -42,6 +43,17 @@ def test_vendor_file_cli_get_all_vendor_files_no_creds(mocker, cli_runner, caplo ) in caplog.record_tuples +def test_vendor_file_cli_get_all_vendor_files_test(cli_runner, caplog): + logger = logging.getLogger("vendor_file_cli") + loggly = logging.NullHandler() + loggly.name = "loggly" + logger.addHandler(loggly) + result = cli_runner.invoke(cli=vendor_file_cli, args=["all-vendor-files", "--test"]) + assert result.exit_code == 0 + assert "Running in test mode" in caplog.text + assert logger.handlers == [] + + def test_vendor_file_cli_get_available_vendors(cli_runner): result = cli_runner.invoke(cli=vendor_file_cli, args=["available-vendors"]) assert result.exit_code == 0 diff --git a/tests/test_utils.py b/tests/test_utils.py index 8abe598..5c9ef38 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -22,7 +22,7 @@ def test_configure_sheet_success(mock_sheet_config): assert creds.refresh_token is not None -def test_configure_sheet_invalid(mock_sheet_config_creds_invalid): +def test_configure_sheet_expired(mock_sheet_config_expired_creds): creds = configure_sheet() assert creds.token == "foo" assert creds.valid is True @@ -42,6 +42,23 @@ def test_configure_sheet_generate_new_creds(mock_sheet_config_no_creds, caplog): ) +def test_configure_sheet_no_creds(mock_sheet_config_no_creds, caplog): + creds = configure_sheet() + assert creds.token == "foo" + assert creds.valid is True + assert creds.expired is False + assert creds.refresh_token is not None + assert ( + "Token for Google Sheet API not found. Running credential config flow." + in caplog.text + ) + + +def test_configure_sheet_invalid_creds(mock_sheet_config_invalid_creds, caplog): + with pytest.raises(ValueError): + configure_sheet() + + def test_connect(stub_client): client = connect("leila") assert client.name == "LEILA" @@ -153,17 +170,43 @@ def test_read_marc_file_stream(stub_file): assert len(records) == 1 -def test_write_data_to_sheet(mock_sheet_config, caplog): - data = write_data_to_sheet({"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}) +def test_write_data_to_sheet(mock_sheet_config): + data = write_data_to_sheet( + {"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}, test=False + ) keys = data.keys() assert sorted(list(keys)) == sorted(["spreadsheetId", "tableRange"]) - assert "Google sheet API credentials configured." in caplog.text -def test_write_data_to_sheet_error(mock_sheet_config, mock_sheet_timeout_error, caplog): - data = write_data_to_sheet({"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}) +def test_write_data_to_sheet_test(mock_sheet_config): + data = write_data_to_sheet( + {"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}, test=True + ) + keys = data.keys() + assert sorted(list(keys)) == sorted(["spreadsheetId", "tableRange"]) + + +def test_write_data_to_sheet_timeout_error( + mock_sheet_config, mock_sheet_timeout_error, caplog +): + data = write_data_to_sheet( + {"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}, test=False + ) assert data is None + assert "Unable to send data to google sheet:" in caplog.text assert ( - "Error occurred while sending data to google sheet: Connection timed out" - in caplog.text + "(FOO) Validation data not written to google sheet for foo.mrc." in caplog.text + ) + + +def test_write_data_to_sheet_auth_error( + mock_sheet_config, mock_sheet_auth_error, caplog +): + data = write_data_to_sheet( + {"file_name": ["foo.mrc"], "vendor_code": ["FOO"]}, test=False + ) + assert data is None + assert "Unable to configure google sheet API credentials:" in caplog.text + assert ( + "(FOO) Validation data not written to google sheet for foo.mrc." in caplog.text ) diff --git a/tests/test_validator.py b/tests/test_validator.py index 18920bd..f08e493 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -17,6 +17,7 @@ def test_get_single_file_no_validation(stub_client, stub_file_info, vendor, capl file=stub_file_info, vendor_client=vendor_client, nsdrop_client=nsdrop_client, + test=True, ) assert ( f"({vendor.upper()}) Connecting to ftp.{vendor}.com via FTP client" @@ -37,6 +38,7 @@ def test_get_single_file_with_validation(stub_client, stub_file_info, caplog): file=stub_file_info, vendor_client=vendor_client, nsdrop_client=nsdrop_client, + test=True, ) assert "(EASTVIEW) Connecting to ftp.eastview.com via SFTP client" in caplog.text assert "(NSDROP) Connecting to ftp.nsdrop.com via SFTP client" in caplog.text @@ -53,6 +55,7 @@ def test_get_single_file_bakertaylor_bpl_root(stub_client, stub_file_info, caplo file=stub_file_info, vendor_client=vendor_client, nsdrop_client=nsdrop_client, + test=True, ) assert ( "(BAKERTAYLOR_BPL) Connecting to ftp.bakertaylor_bpl.com via FTP client" diff --git a/vendor_file_cli/__init__.py b/vendor_file_cli/__init__.py index 3a5f2b1..3dd0c70 100644 --- a/vendor_file_cli/__init__.py +++ b/vendor_file_cli/__init__.py @@ -25,7 +25,8 @@ def vendor_file_cli() -> None: "all-vendor-files", short_help="Retrieve and validate files that are not in NSDROP.", ) -def get_all_vendor_files() -> None: +@click.option("--test", is_flag=True, help="Run in test mode.") +def get_all_vendor_files(test) -> None: """ Retrieve files from vendor server which were created in last year and are not present in vendor's NSDROP directory. Creates list of files on vendor server @@ -34,15 +35,26 @@ def get_all_vendor_files() -> None: and Amalivre (SASB) before copying them to NSDROP and writes output of validation to google sheet. Files are copied to NSDROP/vendor_records/{vendor_name}. + If test flag is passed, the loggly handler is removed so log messages are only + written to the console and log file. The output of any validation is written + to a test sheet. + Args: - None + test: flag to run in test mode Returns: None """ + if test: + handlers = logger.handlers + for handler in handlers: + if handler.name == "loggly": + logger.removeHandler(handler) + logger.info("Running in test mode.") + vendor_list = get_vendor_list() - get_vendor_files(vendors=vendor_list, days=365) + get_vendor_files(vendors=vendor_list, days=365, test=test) @vendor_file_cli.command("available-vendors", short_help="List all configured vendors.") diff --git a/vendor_file_cli/commands.py b/vendor_file_cli/commands.py index 3f3f3e8..3a226d0 100644 --- a/vendor_file_cli/commands.py +++ b/vendor_file_cli/commands.py @@ -20,6 +20,7 @@ def get_vendor_files( vendors: list[str], days: int = 0, hours: int = 0, + test: bool = False, ) -> None: """ Retrieve files from remote server for vendors in `vendor_list`. Forms timedelta @@ -60,6 +61,7 @@ def get_vendor_files( file=file, vendor_client=vendor_client, nsdrop_client=nsdrop_client, + test=test, ) if len(files) > 0: logger.info( diff --git a/vendor_file_cli/utils.py b/vendor_file_cli/utils.py index dff0856..f2124fe 100644 --- a/vendor_file_cli/utils.py +++ b/vendor_file_cli/utils.py @@ -4,6 +4,7 @@ from typing import Generator, Optional, Union from googleapiclient.discovery import build # type: ignore from googleapiclient.errors import HttpError # type: ignore +from google.auth.exceptions import RefreshError from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore @@ -32,11 +33,11 @@ def configure_sheet() -> Credentials: token_uri = "https://oauth2.googleapis.com/token" creds_dict = { - "token": os.environ["GOOGLE_SHEET_TOKEN"], - "refresh_token": os.environ["GOOGLE_SHEET_REFRESH_TOKEN"], + "token": os.getenv("GOOGLE_SHEET_TOKEN"), + "refresh_token": os.getenv("GOOGLE_SHEET_REFRESH_TOKEN"), "token_uri": token_uri, - "client_id": os.environ["GOOGLE_SHEET_CLIENT_ID"], - "client_secret": os.environ["GOOGLE_SHEET_CLIENT_SECRET"], + "client_id": os.getenv("GOOGLE_SHEET_CLIENT_ID"), + "client_secret": os.getenv("GOOGLE_SHEET_CLIENT_SECRET"), "scopes": scopes, "universe_domain": "googleapis.com", "account": "", @@ -44,8 +45,8 @@ def configure_sheet() -> Credentials: } flow_dict = { "installed": { - "client_id": os.environ["GOOGLE_SHEET_CLIENT_ID"], - "client_secret": os.environ["GOOGLE_SHEET_CLIENT_SECRET"], + "client_id": os.getenv("GOOGLE_SHEET_CLIENT_ID"), + "client_secret": os.getenv("GOOGLE_SHEET_CLIENT_SECRET"), "project_id": "marc-record-validator", "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": token_uri, @@ -54,17 +55,19 @@ def configure_sheet() -> Credentials: } } - creds = Credentials.from_authorized_user_info(creds_dict) - if not creds or not creds.valid: + try: + creds = Credentials.from_authorized_user_info(creds_dict) if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) - else: + elif not creds or not creds.valid: logger.debug( "Token for Google Sheet API not found. Running credential config flow." ) flow = InstalledAppFlow.from_client_config(flow_dict, scopes) creds = flow.run_local_server() - return creds + return creds + except (ValueError, RefreshError) as e: + raise e def connect(name: str) -> Client: @@ -220,19 +223,19 @@ def read_marc_file_stream(file_obj: File) -> Generator[Record, None, None]: yield record -def write_data_to_sheet(values: dict) -> Union[dict, None]: +def write_data_to_sheet(values: dict, test: bool) -> Union[dict, None]: """ Write output of validation to google sheet. Args: values: dictionary containing validation output for a file. + test: whether to write to test sheet. Returns: dictionary containing response from google sheet API. """ vendor_code = values["vendor_code"][0] - creds = configure_sheet() - logger.debug("Google sheet API credentials configured.") + file_name = values["file_name"][0] df = pd.DataFrame( values, columns=[ @@ -253,19 +256,25 @@ def write_data_to_sheet(values: dict) -> Union[dict, None]: ], ) df.fillna("", inplace=True) - body = { "majorDimension": "ROWS", "range": f"{vendor_code.upper()}!A1:O10000", "values": df.values.tolist(), } + + if test is True: + spreadsheet_id = "1hGzVYaqxXXBSJa3GY52UFKteZgLoWBo6X0sGsTVTpFU" + else: + spreadsheet_id = "1ZYuhMIE1WiduV98Pdzzw7RwZ08O-sJo7HJihWVgSOhQ" + try: + creds = configure_sheet() service = build("sheets", "v4", credentials=creds) result = ( service.spreadsheets() .values() .append( - spreadsheetId="1ZYuhMIE1WiduV98Pdzzw7RwZ08O-sJo7HJihWVgSOhQ", + spreadsheetId=spreadsheet_id, range=f"{vendor_code.upper()}!A1:O10000", valueInputOption="USER_ENTERED", insertDataOption="INSERT_ROWS", @@ -275,6 +284,11 @@ def write_data_to_sheet(values: dict) -> Union[dict, None]: .execute() ) return result + except (ValueError, RefreshError) as e: + logger.error(f"Unable to configure google sheet API credentials: {e}") except (HttpError, TimeoutError) as e: - logger.error(f"Error occurred while sending data to google sheet: {e}") - return None + logger.error(f"Unable to send data to google sheet: {e}") + logger.error( + f"({vendor_code}) Validation data not written to google sheet for {file_name}." + ) + return None diff --git a/vendor_file_cli/validator.py b/vendor_file_cli/validator.py index 293e38d..6c7ad99 100644 --- a/vendor_file_cli/validator.py +++ b/vendor_file_cli/validator.py @@ -19,7 +19,11 @@ def get_single_file( - vendor: str, file: FileInfo, vendor_client: Client, nsdrop_client: Client + vendor: str, + file: FileInfo, + vendor_client: Client, + nsdrop_client: Client, + test: bool, ) -> File: """ Get a file from a vendor server and copy it to the vendor's NSDROP directory. @@ -43,13 +47,13 @@ def get_single_file( remote_dir = os.environ[f"{vendor.upper()}_SRC"] nsdrop_dir = os.environ[f"{vendor.upper()}_DST"] fetched_file = vendor_client.get_file(file=file, remote_dir=remote_dir) + nsdrop_client.put_file(file=fetched_file, dir=nsdrop_dir, remote=True, check=True) if vendor.upper() in ["EASTVIEW", "LEILA", "AMALIVRE_SASB"]: logger.debug( f"({nsdrop_client.name}) Validating {vendor} file: {fetched_file.file_name}" ) output = validate_file(file_obj=fetched_file, vendor=vendor) - write_data_to_sheet(output) - nsdrop_client.put_file(file=fetched_file, dir=nsdrop_dir, remote=True, check=True) + write_data_to_sheet(output, test=test) return fetched_file