From ea4ca209ff33ea6595805d448f4e9a526e28fb49 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:25 -0600 Subject: [PATCH 01/19] chore: black/ruff --- pylintrc | 6 ---- pyproject.toml | 11 ++++++++ setup.cfg | 19 ------------- setup.py | 74 +++++++++++++++++++++++++------------------------- 4 files changed, 48 insertions(+), 62 deletions(-) delete mode 100644 pylintrc create mode 100644 pyproject.toml delete mode 100644 setup.cfg diff --git a/pylintrc b/pylintrc deleted file mode 100644 index d2b7bad..0000000 --- a/pylintrc +++ /dev/null @@ -1,6 +0,0 @@ -[MASTER] -load-plugins=pylint_quotes -max-line-length=120 -disable=broad-except -ignore-patterns=test_.*?py -generated-members=arcpy.da.SearchCursor,arcpy.da.UpdateCursor,arcpy.da.InsertCursor,arcpy.da.Describe,arcpy.env.scratchFolder,arcpy.env.scratchGDB diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..55e6d84 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.ruff] +line-length = 120 +ignore = ["E501"] +[tool.black] +line-length = 120 +[tool.pytest.ini_options] +minversion = "6.0" +testpaths = [ "tests", "src" ] +norecursedirs = [".env", "data", "maps", ".github", ".vscode"] +console_output_style = "count" +addopts = "--ruff --cov-branch --cov=projectname --cov-report term --cov-report xml:cov.xml --instafail" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 963546f..0000000 --- a/setup.cfg +++ /dev/null @@ -1,19 +0,0 @@ -[yapf] -based_on_style=google -ALLOW_SPLIT_BEFORE_DICT_VALUE=False -COLUMN_LIMIT=120 -COALESCE_BRACKETS=True -DEDENT_CLOSING_BRACKETS=True -EACH_DICT_ENTRY_ON_SEPARATE_LINE=True -INDENT_DICTIONARY_VALUE=False -SPLIT_BEFORE_DOT=True -[tool:isort] -line_length=120 -multi_line_output=5 -[tool:pytest] -testpaths = tests src -norecursedirs = .env data maps -show_capture = True -minversion = 3.5 -console_output_style = count -addopts = --cov-branch --cov=wmrc --cov-report term --cov-report xml:cov.xml --instafail diff --git a/setup.py b/setup.py index 3f625d3..cc7928c 100644 --- a/setup.py +++ b/setup.py @@ -4,62 +4,62 @@ setup.py A module that installs the wmrc skid as a module """ -from glob import glob -from os.path import basename, splitext +from pathlib import Path from setuptools import find_packages, setup #: Load version from source file version = {} -with open('src/wmrc/version.py') as fp: - exec(fp.read(), version) +version_file = Path(__file__).parent / "src" / "skidname" / "version.py" +exec(version_file.read_text(), version) + setup( - name='wmrc-skid', - version=version['__version__'], - license='MIT', - description='Update the wmrc data from Google Sheets via GCF', - author='Jacob Adams', - author_email='jdadms@utah.gov', - url='https://github.com/agrc/wmrc-skid', - packages=find_packages('src'), - package_dir={'': 'src'}, - py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')], + name="wmrc-skid", + version=version["__version__"], + license="MIT", + long_description=(Path(__file__).parent / "README.md").read_text(), + long_description_content_type="text/markdown", + author="Jacob Adams", + author_email="jdadms@utah.gov", + url="https://github.com/agrc/wmrc-skid", + packages=find_packages("src"), + package_dir={"": "src"}, include_package_data=True, zip_safe=True, classifiers=[ # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Topic :: Utilities', + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Topic :: Utilities", ], project_urls={ - 'Issue Tracker': 'https://github.com/agrc/wmrc-skid/issues', + "Issue Tracker": "https://github.com/agrc/wmrc-skid/issues", }, - keywords=['gis'], + keywords=["gis"], install_requires=[ - 'ugrc-palletjack>=4.1,<4.4', - 'agrc-supervisor==3.0.*', + "ugrc-palletjack>=4.1,<4.4", + "agrc-supervisor==3.0.*", ], extras_require={ - 'tests': [ - 'pylint-quotes~=0.2', - 'pylint>=2.15,<4.0', - 'pytest-cov>=4,<6', - 'pytest-instafail~=0.4', - 'pytest-isort>=3.1,<5.0', - 'pytest-pylint~=0.19', - 'pytest-watch~=4.2', - 'pytest>=7.2,<9.0', - 'yapf~=0.32', - 'pytest-mock>=3.10,<3.15', - 'functions-framework~=3.3', + "tests": [ + "pytest-cov>=3,<5", + "pytest-instafail==0.5.*", + "pytest-mock==3.*", + "pytest-ruff==0.*", + "pytest-watch==4.*", + "pytest>=6,<8", + "black>=23.3,<23.12", + "ruff==0.0.*", + "functions-framework==3.4.*", ] }, setup_requires=[ - 'pytest-runner', + "pytest-runner", ], - entry_points={'console_scripts': [ - 'wmrc = wmrc.main:process', - ]}, + entry_points={ + "console_scripts": [ + "wmrc = wmrc.main:process", + ] + }, ) From ef47a8350d441fc00cecac2685b70fe140b51bf3 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 02/19] chore: more black/ruff/deps --- .github/workflows/push.yml | 61 +++++++++++++++++++++----------------- .vscode/extensions.json | 17 +++++++++++ .vscode/settings.json | 26 ++++++++++++++-- setup.py | 2 +- src/wmrc/requirements.txt | 2 +- 5 files changed, 76 insertions(+), 32 deletions(-) create mode 100644 .vscode/extensions.json diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index aa43f59..285596e 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -10,6 +10,12 @@ on: - main - dev +env: + CLOUD_FUNCTION_MEMORY: 512MB + CLOUD_FUCNTION_RUN_TIMEOUT: 9m + SCHEDULE_NAME: first-of-the-month + SCHEDULE_CRON: 0 9 1 * * + concurrency: group: "${{ github.head_ref || github.ref }}" cancel-in-progress: true @@ -23,14 +29,13 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.11" - # cache: pip - # cache-dependency-path: setup.py + python-version: 3.11 + cache: pip + cache-dependency-path: setup.py - name: Install libkrb5 for Kerberos on Linux run: | sudo apt install -y libkrb5-dev - pip install requests-kerberos - name: Checkout code uses: actions/checkout@v4 @@ -79,38 +84,38 @@ jobs: source_dir: src/wmrc service_account_email: cloud-function-sa@${{ secrets.PROJECT_ID }}.iam.gserviceaccount.com event_trigger_type: providers/cloud.pubsub/eventTypes/topic.publish - event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/monday-morning-topic + event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/$SCHEDULE_NAME-topic deploy_timeout: 600 - memory_mb: 512 - timeout: "9m" + memory_mb: $CLOUD_FUNCTION_MEMORY + timeout: $CLOUD_FUNCTION_RUN_TIMEOUT env_vars: STORAGE_BUCKET=${{secrets.STORAGE_BUCKET}} secret_volumes: | /secrets/app/secrets.json=${{secrets.PROJECT_ID}}/skid-secrets - name: 📥 Create PubSub topic run: | - if [ ! "$(gcloud pubsub topics list | grep monday-morning-topic)" ]; then - gcloud pubsub topics create monday-morning-topic --quiet + if [ ! "$(gcloud pubsub topics list | grep $SCHEDULE_NAME-topic)" ]; then + gcloud pubsub topics create $SCHEDULE_NAME-topic --quiet fi - name: 🕰️ Create Cloud Scheduler run: | - if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep monday-morning)" ]; then - gcloud scheduler jobs create pubsub monday-morning \ + if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep $SCHEDULE_NAME)" ]; then + gcloud scheduler jobs create pubsub $SCHEDULE_NAME \ --description="Trigger the wmrc-skid bot once a week on monday morning" \ - --schedule="0 9 * * 1" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=monday-morning-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet else - gcloud scheduler jobs update pubsub monday-morning \ + gcloud scheduler jobs update pubsub $SCHEDULE_NAME \ --description="Trigger the wmrc-skid bot once a week on monday morning" \ - --schedule="0 9 * * 1" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=monday-morning-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet fi @@ -151,38 +156,38 @@ jobs: source_dir: src/wmrc service_account_email: cloud-function-sa@${{ secrets.PROJECT_ID }}.iam.gserviceaccount.com event_trigger_type: providers/cloud.pubsub/eventTypes/topic.publish - event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/first-of-the-month-topic + event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/$SCHEDULE_NAME-topic deploy_timeout: 600 - memory_mb: 512 - timeout: "9m" + memory_mb: $CLOUD_FUNCTION_MEMORY + timeout: $CLOUD_FUNCTION_RUN_TIMEOUT env_vars: STORAGE_BUCKET=${{secrets.STORAGE_BUCKET}} secret_volumes: | /secrets/app/secrets.json=${{secrets.PROJECT_ID}}/skid-secrets - name: 📥 Create PubSub topic run: | - if [ ! "$(gcloud pubsub topics list | grep first-of-the-month-topic)" ]; then - gcloud pubsub topics create first-of-the-month-topic --quiet + if [ ! "$(gcloud pubsub topics list | grep $SCHEDULE_NAME-topic)" ]; then + gcloud pubsub topics create $SCHEDULE_NAME-topic --quiet fi - name: 🕰️ Create Cloud Scheduler run: | - if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep first-of-the-month)" ]; then - gcloud scheduler jobs create pubsub first-of-the-month \ + if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep $SCHEDULE_NAME)" ]; then + gcloud scheduler jobs create pubsub $SCHEDULE_NAME \ --description="Trigger the wmrc-skid bot on the first day of the month" \ - --schedule="0 9 1 * *" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=first-of-the-month-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet else - gcloud scheduler jobs update pubsub first-of-the-month \ + gcloud scheduler jobs update pubsub $SCHEDULE_NAME \ --description="Trigger the wmrc-skid bot on the first day of the month" \ - --schedule="0 9 1 * *" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=first-of-the-month-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet fi diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..e984426 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,17 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations. + // Extension identifier format: ${publisher}.${name}. Example: vscode.csharp + // List of extensions which should be recommended for users of this workspace. + "recommendations": [ + "editorconfig.editorconfig", + "njpwerner.autodocstring", + "ms-python.black-formatter", + "ms-python.vscode-pylance", + "ms-python.python", + "donjayamanne.python-environment-manager", + "charliermarsh.ruff", + "tamasfe.even-better-toml" + ], + // List of extensions recommended by VS Code that should not be recommended for users of this workspace. + "unwantedRecommendations": [] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 11f28e8..af7aea1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,5 +12,27 @@ "ugrc", "wkid", "wmrc" - ] -} + ], + "editor.formatOnSave": true, + "editor.rulers": [ + 120 + ], + "coverage-gutters.showGutterCoverage": false, + "coverage-gutters.showLineCoverage": true, + "coverage-gutters.showRulerCoverage": false, + "coverage-gutters.highlightdark": "rgb(61, 153, 112, .05)", + "coverage-gutters.noHighlightDark": "rgb(255, 65, 54, .05)", + "coverage-gutters.partialHighlightDark": "rgb(255, 133, 27, .05)", + "python.languageServer": "Pylance", + "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": [ + "--no-cov" + ], + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + } +} \ No newline at end of file diff --git a/setup.py b/setup.py index cc7928c..7c5c997 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ #: Load version from source file version = {} -version_file = Path(__file__).parent / "src" / "skidname" / "version.py" +version_file = Path(__file__).parent / "src" / "wmrc" / "version.py" exec(version_file.read_text(), version) diff --git a/src/wmrc/requirements.txt b/src/wmrc/requirements.txt index 1014d7b..08900e5 100644 --- a/src/wmrc/requirements.txt +++ b/src/wmrc/requirements.txt @@ -1,3 +1,3 @@ agrc-supervisor==3.0.* -ugrc-palletjack==4.2.* +ugrc-palletjack==4.3.* google-cloud-storage==2.3.* From e9628add2e5427f53915145101673a41835c4b9d Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 03/19] fix: cov name --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 55e6d84..f5040b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ ignore = ["E501"] line-length = 120 [tool.pytest.ini_options] minversion = "6.0" -testpaths = [ "tests", "src" ] +testpaths = ["tests", "src"] norecursedirs = [".env", "data", "maps", ".github", ".vscode"] console_output_style = "count" -addopts = "--ruff --cov-branch --cov=projectname --cov-report term --cov-report xml:cov.xml --instafail" +addopts = "--ruff --cov-branch --cov=wmrc --cov-report term --cov-report xml:cov.xml --instafail" From 56c9eeda1648da4eebd94aae0d8ced1b8620ff1b Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 04/19] fix: ordering --- .github/workflows/push.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 285596e..0ed7347 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -26,6 +26,11 @@ jobs: runs-on: ubuntu-latest steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + show-progress: false + - name: Set up Python uses: actions/setup-python@v5 with: @@ -37,11 +42,6 @@ jobs: run: | sudo apt install -y libkrb5-dev - - name: Checkout code - uses: actions/checkout@v4 - with: - show-progress: false - - name: Install module run: pip install .[tests] From 13fc2ee223ad6ef616c17de47f28e76cb490f3f5 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 05/19] chore: make ruff happy --- src/wmrc/main.py | 135 +++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 68 deletions(-) diff --git a/src/wmrc/main.py b/src/wmrc/main.py index 431fa6a..5b4592a 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -4,10 +4,7 @@ Run the wmrc script as a cloud function. """ import json -import locale import logging -import os -import pprint import sys from datetime import date, datetime from pathlib import Path @@ -17,8 +14,8 @@ import arcgis import google.auth import pandas as pd -from arcgis.features import GeoAccessor, GeoSeriesAccessor -from palletjack import extract, transform, load +from arcgis.features import GeoAccessor, GeoSeriesAccessor # noqa: F401 +from palletjack import extract, load, transform from supervisor.message_handlers import SendGridHandler from supervisor.models import MessageDetails, Supervisor @@ -41,21 +38,21 @@ def _get_secrets(): dict: The secrets .json loaded as a dictionary """ - secret_folder = Path('/secrets') + secret_folder = Path("/secrets") #: Try to get the secrets from the Cloud Function mount point if secret_folder.exists(): - secrets_dict = json.loads(Path('/secrets/app/secrets.json').read_text(encoding='utf-8')) + secrets_dict = json.loads(Path("/secrets/app/secrets.json").read_text(encoding="utf-8")) credentials, _ = google.auth.default() - secrets_dict['SERVICE_ACCOUNT_JSON'] = credentials + secrets_dict["SERVICE_ACCOUNT_JSON"] = credentials return secrets_dict #: Otherwise, try to load a local copy for local development - secret_folder = (Path(__file__).parent / 'secrets') + secret_folder = Path(__file__).parent / "secrets" if secret_folder.exists(): - return json.loads((secret_folder / 'secrets.json').read_text(encoding='utf-8')) + return json.loads((secret_folder / "secrets.json").read_text(encoding="utf-8")) - raise FileNotFoundError('Secrets folder not found; secrets not loaded.') + raise FileNotFoundError("Secrets folder not found; secrets not loaded.") def _initialize(log_path, sendgrid_api_key): @@ -71,17 +68,17 @@ def _initialize(log_path, sendgrid_api_key): skid_logger = logging.getLogger(config.SKID_NAME) skid_logger.setLevel(config.LOG_LEVEL) - palletjack_logger = logging.getLogger('palletjack') + palletjack_logger = logging.getLogger("palletjack") palletjack_logger.setLevel(config.LOG_LEVEL) cli_handler = logging.StreamHandler(sys.stdout) cli_handler.setLevel(config.LOG_LEVEL) formatter = logging.Formatter( - fmt='%(levelname)-7s %(asctime)s %(name)15s:%(lineno)5s %(message)s', datefmt='%Y-%m-%d %H:%M:%S' + fmt="%(levelname)-7s %(asctime)s %(name)15s:%(lineno)5s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) cli_handler.setFormatter(formatter) - log_handler = logging.FileHandler(log_path, mode='w') + log_handler = logging.FileHandler(log_path, mode="w") log_handler.setLevel(config.LOG_LEVEL) log_handler.setFormatter(formatter) @@ -95,10 +92,10 @@ def _initialize(log_path, sendgrid_api_key): #: (all log messages were duplicated if put at beginning) logging.captureWarnings(True) - skid_logger.debug('Creating Supervisor object') + skid_logger.debug("Creating Supervisor object") skid_supervisor = Supervisor(handle_errors=False) sendgrid_settings = config.SENDGRID_SETTINGS - sendgrid_settings['api_key'] = sendgrid_api_key + sendgrid_settings["api_key"] = sendgrid_api_key skid_supervisor.add_message_handler( SendGridHandler( sendgrid_settings=sendgrid_settings, client_name=config.SKID_NAME, client_version=version.__version__ @@ -122,13 +119,12 @@ def _remove_log_file_handlers(log_name, loggers): if log_name in handler.stream.name: logger.removeHandler(handler) handler.close() - except Exception as error: + except Exception: pass def process(): - """The main function that does all the work. - """ + """The main function that does all the work.""" #: Set up secrets, tempdir, supervisor, and logging start = datetime.now() @@ -151,113 +147,116 @@ def process(): gis = arcgis.gis.GIS(config.AGOL_ORG, secrets.AGOL_USER, secrets.AGOL_PASSWORD) #: Do the work - module_logger.info('Loading data from Google Sheets...') + module_logger.info("Loading data from Google Sheets...") combined_df = _parse_from_google_sheets(secrets) - module_logger.info('Adding county names from SGID county boundaries...') + module_logger.info("Adding county names from SGID county boundaries...") with_counties_df = _get_county_names(combined_df, gis) - module_logger.info('Preparing data for truncate and load...') + module_logger.info("Preparing data for truncate and load...") proj_df = with_counties_df.copy() proj_df.spatial.project(4326) - proj_df.spatial.set_geometry('SHAPE') - proj_df.spatial.sr = {'wkid': 4326} - proj_df['last_updated'] = date.today() - proj_df = transform.DataCleaning.switch_to_datetime(proj_df, ['last_updated']) + proj_df.spatial.set_geometry("SHAPE") + proj_df.spatial.sr = {"wkid": 4326} + proj_df["last_updated"] = date.today() + proj_df = transform.DataCleaning.switch_to_datetime(proj_df, ["last_updated"]) proj_df = transform.DataCleaning.switch_to_float( - proj_df, [ - 'latitude', - 'longitude', - 'tons_of_material_diverted_from_', - 'gallons_of_used_oil_collected_for_recycling_last_year', - ] + proj_df, + [ + "latitude", + "longitude", + "tons_of_material_diverted_from_", + "gallons_of_used_oil_collected_for_recycling_last_year", + ], ) - module_logger.info('Truncating and loading...') + module_logger.info("Truncating and loading...") updater = load.FeatureServiceUpdater(gis, config.FEATURE_LAYER_ITEMID, tempdir) load_count = updater.truncate_and_load_features(proj_df) end = datetime.now() summary_message = MessageDetails() - summary_message.subject = f'{config.SKID_NAME} Update Summary' + summary_message.subject = f"{config.SKID_NAME} Update Summary" summary_rows = [ f'{config.SKID_NAME} update {start.strftime("%Y-%m-%d")}', - '=' * 20, - '', + "=" * 20, + "", f'Start time: {start.strftime("%H:%M:%S")}', f'End time: {end.strftime("%H:%M:%S")}', - f'Duration: {str(end-start)}', - '', - f'Rows loaded: {load_count}', + f"Duration: {str(end-start)}", + "", + f"Rows loaded: {load_count}", ] - summary_message.message = '\n'.join(summary_rows) + summary_message.message = "\n".join(summary_rows) summary_message.attachments = tempdir_path / log_name skid_supervisor.notify(summary_message) #: Remove file handler so the tempdir will close properly - loggers = [logging.getLogger(config.SKID_NAME), logging.getLogger('palletjack')] + loggers = [logging.getLogger(config.SKID_NAME), logging.getLogger("palletjack")] _remove_log_file_handlers(log_name, loggers) def _parse_from_google_sheets(secrets): - #: Get individual sheets gsheet_extractor = extract.GSheetLoader(secrets.SERVICE_ACCOUNT_JSON) - sw_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, 'SW Facilities', by_title=True) - uocc_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, 'UOCCs', by_title=True) + sw_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, "SW Facilities", by_title=True) + uocc_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, "UOCCs", by_title=True) #: Fix columns - sw_df.drop(columns=[''], inplace=True) #: Drop empty columns that don't have a name + sw_df.drop(columns=[""], inplace=True) #: Drop empty columns that don't have a name sw_df.rename( - columns={'Accept Material\n Dropped \n Off by the Public': 'Accept Material Dropped Off by the Public'}, - inplace=True + columns={"Accept Material\n Dropped \n Off by the Public": "Accept Material Dropped Off by the Public"}, + inplace=True, ) uocc_df.rename( columns={ - 'Type': 'Class', - 'Accept Material\n Dropped \n Off by the Public': 'Accept Material Dropped Off by the Public' + "Type": "Class", + "Accept Material\n Dropped \n Off by the Public": "Accept Material Dropped Off by the Public", }, - inplace=True + inplace=True, ) combined_df = pd.concat([sw_df, uocc_df]).query('Status in ["Open", "OPEN"]') - renamed_df = transform.DataCleaning.rename_dataframe_columns_for_agol(combined_df).rename(columns=str.lower).rename( - columns={ - 'longitude_': 'longitude', - 'accept_material_dropped_off_by_the_public': 'accept_material_dropped_off_by_', - 'tons_of_material_diverted_from_landfills_last_year': 'tons_of_material_diverted_from_' - } + renamed_df = ( + transform.DataCleaning.rename_dataframe_columns_for_agol(combined_df) + .rename(columns=str.lower) + .rename( + columns={ + "longitude_": "longitude", + "accept_material_dropped_off_by_the_public": "accept_material_dropped_off_by_", + "tons_of_material_diverted_from_landfills_last_year": "tons_of_material_diverted_from_", + } + ) ) return renamed_df def _get_county_names(input_df, gis): - #: Load counties from open data feature service counties_df = pd.DataFrame.spatial.from_layer( arcgis.features.FeatureLayer.fromitem(gis.content.get(config.COUNTIES_ITEMID)) ) counties_df.spatial.project(26912) counties_df.reset_index(inplace=True) - counties_df = counties_df.reindex(columns=['SHAPE', 'NAME']) #: We only care about the county name - counties_df.spatial.set_geometry('SHAPE') - counties_df.spatial.sr = {'wkid': 26912} + counties_df = counties_df.reindex(columns=["SHAPE", "NAME"]) #: We only care about the county name + counties_df.spatial.set_geometry("SHAPE") + counties_df.spatial.sr = {"wkid": 26912} #: Convert dataframe to spatial - spatial_df = pd.DataFrame.spatial.from_xy(input_df, x_column='longitude', y_column='latitude') + spatial_df = pd.DataFrame.spatial.from_xy(input_df, x_column="longitude", y_column="latitude") spatial_df.reset_index(drop=True, inplace=True) spatial_df.spatial.project(26912) - spatial_df.spatial.set_geometry('SHAPE') - spatial_df.spatial.sr = {'wkid': 26912} + spatial_df.spatial.set_geometry("SHAPE") + spatial_df.spatial.sr = {"wkid": 26912} #: Perform the join, clean up the output - joined_points_df = spatial_df.spatial.join(counties_df, 'left', 'within') - joined_points_df.drop(columns=['index_right'], inplace=True) - joined_points_df.rename(columns={'NAME': 'county_name'}, inplace=True) - joined_points_df['county_name'] = joined_points_df['county_name'].str.title() + joined_points_df = spatial_df.spatial.join(counties_df, "left", "within") + joined_points_df.drop(columns=["index_right"], inplace=True) + joined_points_df.rename(columns={"NAME": "county_name"}, inplace=True) + joined_points_df["county_name"] = joined_points_df["county_name"].str.title() return joined_points_df @@ -293,5 +292,5 @@ def main(event, context): # pylint: disable=unused-argument #: Putting this here means you can call the file via `python main.py` and it will run. Useful for pre-GCF testing. -if __name__ == '__main__': +if __name__ == "__main__": process() From b95a20f5c8225342b053bf6d9f3c1f53890f4b00 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 06/19] ci: fix memory specification --- .github/workflows/push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 0ed7347..0fe5a12 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -11,7 +11,7 @@ on: - dev env: - CLOUD_FUNCTION_MEMORY: 512MB + CLOUD_FUNCTION_MEMORY: 512 CLOUD_FUCNTION_RUN_TIMEOUT: 9m SCHEDULE_NAME: first-of-the-month SCHEDULE_CRON: 0 9 1 * * From 25759b3aa5ebe88903dbaac370a4d3bcdc8f8d3f Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 07/19] ci: use context vars instead of runner vars --- .github/workflows/push.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 0fe5a12..4c076b3 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -12,7 +12,7 @@ on: env: CLOUD_FUNCTION_MEMORY: 512 - CLOUD_FUCNTION_RUN_TIMEOUT: 9m + CLOUD_FUNCTION_RUN_TIMEOUT: 9m SCHEDULE_NAME: first-of-the-month SCHEDULE_CRON: 0 9 1 * * @@ -84,10 +84,10 @@ jobs: source_dir: src/wmrc service_account_email: cloud-function-sa@${{ secrets.PROJECT_ID }}.iam.gserviceaccount.com event_trigger_type: providers/cloud.pubsub/eventTypes/topic.publish - event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/$SCHEDULE_NAME-topic + event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/${{ env.SCHEDULE_NAME }}-topic deploy_timeout: 600 - memory_mb: $CLOUD_FUNCTION_MEMORY - timeout: $CLOUD_FUNCTION_RUN_TIMEOUT + memory_mb: ${{ env.CLOUD_FUNCTION_MEMORY }} + timeout: ${{ env.CLOUD_FUNCTION_RUN_TIMEOUT }} env_vars: STORAGE_BUCKET=${{secrets.STORAGE_BUCKET}} secret_volumes: | /secrets/app/secrets.json=${{secrets.PROJECT_ID}}/skid-secrets @@ -156,10 +156,10 @@ jobs: source_dir: src/wmrc service_account_email: cloud-function-sa@${{ secrets.PROJECT_ID }}.iam.gserviceaccount.com event_trigger_type: providers/cloud.pubsub/eventTypes/topic.publish - event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/$SCHEDULE_NAME-topic + event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/${{ env.SCHEDULE_NAME }}-topic deploy_timeout: 600 - memory_mb: $CLOUD_FUNCTION_MEMORY - timeout: $CLOUD_FUNCTION_RUN_TIMEOUT + memory_mb: ${{ env.CLOUD_FUNCTION_MEMORY }} + timeout: ${{ env.CLOUD_FUNCTION_RUN_TIMEOUT }} env_vars: STORAGE_BUCKET=${{secrets.STORAGE_BUCKET}} secret_volumes: | /secrets/app/secrets.json=${{secrets.PROJECT_ID}}/skid-secrets From 5c0cfee4061340bd972cd3b72a35fb28ac9fa9a0 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 08/19] ci: schedule description --- .github/workflows/push.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 4c076b3..eb7429a 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -15,6 +15,7 @@ env: CLOUD_FUNCTION_RUN_TIMEOUT: 9m SCHEDULE_NAME: first-of-the-month SCHEDULE_CRON: 0 9 1 * * + SCHEDULE_DESCRIPTION: Trigger the wmrc-skid bot at the beginning of the month concurrency: group: "${{ github.head_ref || github.ref }}" @@ -102,7 +103,7 @@ jobs: run: | if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep $SCHEDULE_NAME)" ]; then gcloud scheduler jobs create pubsub $SCHEDULE_NAME \ - --description="Trigger the wmrc-skid bot once a week on monday morning" \ + --description="$SCHEDULE_DESCRIPTION" \ --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ @@ -111,7 +112,7 @@ jobs: --quiet else gcloud scheduler jobs update pubsub $SCHEDULE_NAME \ - --description="Trigger the wmrc-skid bot once a week on monday morning" \ + --description="$SCHEDULE_DESCRIPTION" \ --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ @@ -174,7 +175,7 @@ jobs: run: | if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep $SCHEDULE_NAME)" ]; then gcloud scheduler jobs create pubsub $SCHEDULE_NAME \ - --description="Trigger the wmrc-skid bot on the first day of the month" \ + --description="$SCHEDULE_DESCRIPTION" \ --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ @@ -183,7 +184,7 @@ jobs: --quiet else gcloud scheduler jobs update pubsub $SCHEDULE_NAME \ - --description="Trigger the wmrc-skid bot on the first day of the month" \ + --description="$SCHEDULE_DESCRIPTION" \ --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ From 516884f486a6e3e657c26a85e9f0c92e831bf1ac Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 09/19] chore: double qoutes --- src/wmrc/config.py | 22 +++++++++++----------- src/wmrc/version.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/wmrc/config.py b/src/wmrc/config.py index a3138c4..2a8bf69 100644 --- a/src/wmrc/config.py +++ b/src/wmrc/config.py @@ -6,12 +6,12 @@ import socket import urllib.request -SKID_NAME = 'wmrc' +SKID_NAME = "wmrc" #: Try to get project id from GCP metadata server for hostname. If it's empty or errors out, revert to local hostname try: - url = 'http://metadata.google.internal/computeMetadata/v1/project/project-id' + url = "http://metadata.google.internal/computeMetadata/v1/project/project-id" req = urllib.request.Request(url) - req.add_header('Metadata-Flavor', 'Google') + req.add_header("Metadata-Flavor", "Google") project_id = urllib.request.urlopen(req).read().decode() if not project_id: raise ValueError @@ -19,17 +19,17 @@ except Exception: HOST_NAME = socket.gethostname() -AGOL_ORG = 'https://utahdeq.maps.arcgis.com' +AGOL_ORG = "https://utahdeq.maps.arcgis.com" SENDGRID_SETTINGS = { #: Settings for SendGridHandler - 'from_address': 'noreply@utah.gov', - 'to_addresses': ['jdadams@utah.gov', 'stevienorcross@utah.gov', 'gerardorodriguez@utah.gov'], - 'prefix': f'{SKID_NAME} on {HOST_NAME}: ', + "from_address": "noreply@utah.gov", + "to_addresses": ["jdadams@utah.gov", "stevienorcross@utah.gov", "gerardorodriguez@utah.gov"], + "prefix": f"{SKID_NAME} on {HOST_NAME}: ", } LOG_LEVEL = logging.DEBUG -LOG_FILE_NAME = 'log' +LOG_FILE_NAME = "log" -COUNTIES_ITEMID = '90431cac2f9f49f4bcf1505419583753' +COUNTIES_ITEMID = "90431cac2f9f49f4bcf1505419583753" # FEATURE_LAYER_ITEMID = '4df06137fb0a45459e49107a5f47a326' #: Beta version -FEATURE_LAYER_ITEMID = '056bbc52ff3240f6b69666750a61aeff' #: Live version -JOIN_COLUMN = 'id_' +FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version +JOIN_COLUMN = "id_" diff --git a/src/wmrc/version.py b/src/wmrc/version.py index 3720cdc..22b96d0 100644 --- a/src/wmrc/version.py +++ b/src/wmrc/version.py @@ -3,4 +3,4 @@ This must only include a single line: __version__ = 'x.y.z' """ -__version__ = '1.1.0' +__version__ = "1.1.0" From e553308574728e6f1cc6eb7a82495c68a799bf42 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 10/19] fix: don't bomb out on missing empty columns --- src/wmrc/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/wmrc/main.py b/src/wmrc/main.py index 5b4592a..21c8ea5 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -205,7 +205,11 @@ def _parse_from_google_sheets(secrets): uocc_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, "UOCCs", by_title=True) #: Fix columns - sw_df.drop(columns=[""], inplace=True) #: Drop empty columns that don't have a name + try: + sw_df.drop(columns=[""], inplace=True) #: Drop empty columns that don't have a name + except KeyError: + pass + sw_df.rename( columns={"Accept Material\n Dropped \n Off by the Public": "Accept Material Dropped Off by the Public"}, inplace=True, From 8c09619e8ebfd3fffa86836e2c36aa193aae92aa Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 11/19] chore: fix fields changed in sheet --- src/wmrc/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wmrc/main.py b/src/wmrc/main.py index 21c8ea5..d19a6fc 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -164,11 +164,13 @@ def process(): [ "latitude", "longitude", - "tons_of_material_diverted_from_", + # "tons_of_material_diverted_from_", "gallons_of_used_oil_collected_for_recycling_last_year", ], ) + proj_df.drop(columns=["local_health_department", "uocc_email_address"], inplace=True) + module_logger.info("Truncating and loading...") updater = load.FeatureServiceUpdater(gis, config.FEATURE_LAYER_ITEMID, tempdir) load_count = updater.truncate_and_load_features(proj_df) From 08c3da29c294c47cee327d2d8bc41df0ab7a5e09 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:26 -0600 Subject: [PATCH 12/19] chore: supervisor error handling in gcp? --- src/wmrc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wmrc/main.py b/src/wmrc/main.py index d19a6fc..58e40e2 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -93,7 +93,7 @@ def _initialize(log_path, sendgrid_api_key): logging.captureWarnings(True) skid_logger.debug("Creating Supervisor object") - skid_supervisor = Supervisor(handle_errors=False) + skid_supervisor = Supervisor(handle_errors=True) sendgrid_settings = config.SENDGRID_SETTINGS sendgrid_settings["api_key"] = sendgrid_api_key skid_supervisor.add_message_handler( From 87199fb48c8494cf1479115c9406c5f8056b965d Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:27 -0600 Subject: [PATCH 13/19] chore: nope --- src/wmrc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wmrc/main.py b/src/wmrc/main.py index 58e40e2..d19a6fc 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -93,7 +93,7 @@ def _initialize(log_path, sendgrid_api_key): logging.captureWarnings(True) skid_logger.debug("Creating Supervisor object") - skid_supervisor = Supervisor(handle_errors=True) + skid_supervisor = Supervisor(handle_errors=False) sendgrid_settings = config.SENDGRID_SETTINGS sendgrid_settings["api_key"] = sendgrid_api_key skid_supervisor.add_message_handler( From 9078c76ddbe27374e4bcd0afa66b4df3688227c2 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:27 -0600 Subject: [PATCH 14/19] ci: memory issue? --- .github/workflows/push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index eb7429a..2feebb2 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -11,7 +11,7 @@ on: - dev env: - CLOUD_FUNCTION_MEMORY: 512 + CLOUD_FUNCTION_MEMORY: 1024 CLOUD_FUNCTION_RUN_TIMEOUT: 9m SCHEDULE_NAME: first-of-the-month SCHEDULE_CRON: 0 9 1 * * From 863c7403abbe6d565e30ade8f05258af32c31d6b Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:27 -0600 Subject: [PATCH 15/19] chore: pin requests to fix arcgis issue --- setup.py | 2 +- src/wmrc/config.py | 2 +- src/wmrc/requirements.txt | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 7c5c997..a492975 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ }, keywords=["gis"], install_requires=[ - "ugrc-palletjack>=4.1,<4.4", + "ugrc-palletjack>=4.1,<4.5", "agrc-supervisor==3.0.*", ], extras_require={ diff --git a/src/wmrc/config.py b/src/wmrc/config.py index 2a8bf69..7b126af 100644 --- a/src/wmrc/config.py +++ b/src/wmrc/config.py @@ -30,6 +30,6 @@ COUNTIES_ITEMID = "90431cac2f9f49f4bcf1505419583753" -# FEATURE_LAYER_ITEMID = '4df06137fb0a45459e49107a5f47a326' #: Beta version +# FEATURE_LAYER_ITEMID = "4df06137fb0a45459e49107a5f47a326" #: Beta version FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version JOIN_COLUMN = "id_" diff --git a/src/wmrc/requirements.txt b/src/wmrc/requirements.txt index 08900e5..9936c1a 100644 --- a/src/wmrc/requirements.txt +++ b/src/wmrc/requirements.txt @@ -1,3 +1,4 @@ agrc-supervisor==3.0.* -ugrc-palletjack==4.3.* -google-cloud-storage==2.3.* +ugrc-palletjack==4.4.* +google-cloud-storage==2.16.* +requests==2.31.* From 8972681d1b7468ab0e64887a9ada45ef719e07c2 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:27 -0600 Subject: [PATCH 16/19] chore: get the versions right --- src/wmrc/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wmrc/requirements.txt b/src/wmrc/requirements.txt index 9936c1a..f9eae26 100644 --- a/src/wmrc/requirements.txt +++ b/src/wmrc/requirements.txt @@ -1,4 +1,4 @@ -agrc-supervisor==3.0.* +agrc-supervisor==3.0.3 ugrc-palletjack==4.4.* google-cloud-storage==2.16.* -requests==2.31.* +requests<2.32 From 70675e16833862590f063fbc63bcd3418d2c7c3c Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:27 -0600 Subject: [PATCH 17/19] chore: test on beta layer --- src/wmrc/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wmrc/config.py b/src/wmrc/config.py index 7b126af..38b492c 100644 --- a/src/wmrc/config.py +++ b/src/wmrc/config.py @@ -30,6 +30,6 @@ COUNTIES_ITEMID = "90431cac2f9f49f4bcf1505419583753" -# FEATURE_LAYER_ITEMID = "4df06137fb0a45459e49107a5f47a326" #: Beta version -FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version +FEATURE_LAYER_ITEMID = "4df06137fb0a45459e49107a5f47a326" #: Beta version +# FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version JOIN_COLUMN = "id_" From b1f066dcb3cfe0a23b828e557b5d365a0acacb5c Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 08:21:27 -0600 Subject: [PATCH 18/19] chore: ready to point to live --- src/wmrc/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wmrc/config.py b/src/wmrc/config.py index 38b492c..7b126af 100644 --- a/src/wmrc/config.py +++ b/src/wmrc/config.py @@ -30,6 +30,6 @@ COUNTIES_ITEMID = "90431cac2f9f49f4bcf1505419583753" -FEATURE_LAYER_ITEMID = "4df06137fb0a45459e49107a5f47a326" #: Beta version -# FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version +# FEATURE_LAYER_ITEMID = "4df06137fb0a45459e49107a5f47a326" #: Beta version +FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version JOIN_COLUMN = "id_" From 0b3f38062e1a50c219d4db6ecbb5ef3c8d6eff1c Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 23 May 2024 09:13:44 -0600 Subject: [PATCH 19/19] ci: reduce memory --- .github/workflows/push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 2feebb2..eb7429a 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -11,7 +11,7 @@ on: - dev env: - CLOUD_FUNCTION_MEMORY: 1024 + CLOUD_FUNCTION_MEMORY: 512 CLOUD_FUNCTION_RUN_TIMEOUT: 9m SCHEDULE_NAME: first-of-the-month SCHEDULE_CRON: 0 9 1 * *