diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index aa43f59..eb7429a 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -10,6 +10,13 @@ on: - main - dev +env: + CLOUD_FUNCTION_MEMORY: 512 + CLOUD_FUNCTION_RUN_TIMEOUT: 9m + SCHEDULE_NAME: first-of-the-month + SCHEDULE_CRON: 0 9 1 * * + SCHEDULE_DESCRIPTION: Trigger the wmrc-skid bot at the beginning of the month + concurrency: group: "${{ github.head_ref || github.ref }}" cancel-in-progress: true @@ -20,22 +27,21 @@ jobs: runs-on: ubuntu-latest steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + show-progress: false + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.11" - # cache: pip - # cache-dependency-path: setup.py + python-version: 3.11 + cache: pip + cache-dependency-path: setup.py - name: Install libkrb5 for Kerberos on Linux run: | sudo apt install -y libkrb5-dev - pip install requests-kerberos - - - name: Checkout code - uses: actions/checkout@v4 - with: - show-progress: false - name: Install module run: pip install .[tests] @@ -79,38 +85,38 @@ jobs: source_dir: src/wmrc service_account_email: cloud-function-sa@${{ secrets.PROJECT_ID }}.iam.gserviceaccount.com event_trigger_type: providers/cloud.pubsub/eventTypes/topic.publish - event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/monday-morning-topic + event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/${{ env.SCHEDULE_NAME }}-topic deploy_timeout: 600 - memory_mb: 512 - timeout: "9m" + memory_mb: ${{ env.CLOUD_FUNCTION_MEMORY }} + timeout: ${{ env.CLOUD_FUNCTION_RUN_TIMEOUT }} env_vars: STORAGE_BUCKET=${{secrets.STORAGE_BUCKET}} secret_volumes: | /secrets/app/secrets.json=${{secrets.PROJECT_ID}}/skid-secrets - name: 📥 Create PubSub topic run: | - if [ ! "$(gcloud pubsub topics list | grep monday-morning-topic)" ]; then - gcloud pubsub topics create monday-morning-topic --quiet + if [ ! "$(gcloud pubsub topics list | grep $SCHEDULE_NAME-topic)" ]; then + gcloud pubsub topics create $SCHEDULE_NAME-topic --quiet fi - name: 🕰️ Create Cloud Scheduler run: | - if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep monday-morning)" ]; then - gcloud scheduler jobs create pubsub monday-morning \ - --description="Trigger the wmrc-skid bot once a week on monday morning" \ - --schedule="0 9 * * 1" \ + if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep $SCHEDULE_NAME)" ]; then + gcloud scheduler jobs create pubsub $SCHEDULE_NAME \ + --description="$SCHEDULE_DESCRIPTION" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=monday-morning-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet else - gcloud scheduler jobs update pubsub monday-morning \ - --description="Trigger the wmrc-skid bot once a week on monday morning" \ - --schedule="0 9 * * 1" \ + gcloud scheduler jobs update pubsub $SCHEDULE_NAME \ + --description="$SCHEDULE_DESCRIPTION" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=monday-morning-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet fi @@ -151,38 +157,38 @@ jobs: source_dir: src/wmrc service_account_email: cloud-function-sa@${{ secrets.PROJECT_ID }}.iam.gserviceaccount.com event_trigger_type: providers/cloud.pubsub/eventTypes/topic.publish - event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/first-of-the-month-topic + event_trigger_resource: projects/${{ secrets.PROJECT_ID }}/topics/${{ env.SCHEDULE_NAME }}-topic deploy_timeout: 600 - memory_mb: 512 - timeout: "9m" + memory_mb: ${{ env.CLOUD_FUNCTION_MEMORY }} + timeout: ${{ env.CLOUD_FUNCTION_RUN_TIMEOUT }} env_vars: STORAGE_BUCKET=${{secrets.STORAGE_BUCKET}} secret_volumes: | /secrets/app/secrets.json=${{secrets.PROJECT_ID}}/skid-secrets - name: 📥 Create PubSub topic run: | - if [ ! "$(gcloud pubsub topics list | grep first-of-the-month-topic)" ]; then - gcloud pubsub topics create first-of-the-month-topic --quiet + if [ ! "$(gcloud pubsub topics list | grep $SCHEDULE_NAME-topic)" ]; then + gcloud pubsub topics create $SCHEDULE_NAME-topic --quiet fi - name: 🕰️ Create Cloud Scheduler run: | - if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep first-of-the-month)" ]; then - gcloud scheduler jobs create pubsub first-of-the-month \ - --description="Trigger the wmrc-skid bot on the first day of the month" \ - --schedule="0 9 1 * *" \ + if [ ! "$(gcloud scheduler jobs list --location=us-central1 | grep $SCHEDULE_NAME)" ]; then + gcloud scheduler jobs create pubsub $SCHEDULE_NAME \ + --description="$SCHEDULE_DESCRIPTION" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=first-of-the-month-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet else - gcloud scheduler jobs update pubsub first-of-the-month \ - --description="Trigger the wmrc-skid bot on the first day of the month" \ - --schedule="0 9 1 * *" \ + gcloud scheduler jobs update pubsub $SCHEDULE_NAME \ + --description="$SCHEDULE_DESCRIPTION" \ + --schedule="$SCHEDULE_CRON" \ --time-zone=America/Denver \ --location=us-central1 \ - --topic=first-of-the-month-topic \ + --topic=$SCHEDULE_NAME-topic \ --message-body='{"run": "now"}' \ --quiet fi diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..e984426 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,17 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations. + // Extension identifier format: ${publisher}.${name}. Example: vscode.csharp + // List of extensions which should be recommended for users of this workspace. + "recommendations": [ + "editorconfig.editorconfig", + "njpwerner.autodocstring", + "ms-python.black-formatter", + "ms-python.vscode-pylance", + "ms-python.python", + "donjayamanne.python-environment-manager", + "charliermarsh.ruff", + "tamasfe.even-better-toml" + ], + // List of extensions recommended by VS Code that should not be recommended for users of this workspace. + "unwantedRecommendations": [] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 11f28e8..af7aea1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,5 +12,27 @@ "ugrc", "wkid", "wmrc" - ] -} + ], + "editor.formatOnSave": true, + "editor.rulers": [ + 120 + ], + "coverage-gutters.showGutterCoverage": false, + "coverage-gutters.showLineCoverage": true, + "coverage-gutters.showRulerCoverage": false, + "coverage-gutters.highlightdark": "rgb(61, 153, 112, .05)", + "coverage-gutters.noHighlightDark": "rgb(255, 65, 54, .05)", + "coverage-gutters.partialHighlightDark": "rgb(255, 133, 27, .05)", + "python.languageServer": "Pylance", + "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": [ + "--no-cov" + ], + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + } +} \ No newline at end of file diff --git a/pylintrc b/pylintrc deleted file mode 100644 index d2b7bad..0000000 --- a/pylintrc +++ /dev/null @@ -1,6 +0,0 @@ -[MASTER] -load-plugins=pylint_quotes -max-line-length=120 -disable=broad-except -ignore-patterns=test_.*?py -generated-members=arcpy.da.SearchCursor,arcpy.da.UpdateCursor,arcpy.da.InsertCursor,arcpy.da.Describe,arcpy.env.scratchFolder,arcpy.env.scratchGDB diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f5040b5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.ruff] +line-length = 120 +ignore = ["E501"] +[tool.black] +line-length = 120 +[tool.pytest.ini_options] +minversion = "6.0" +testpaths = ["tests", "src"] +norecursedirs = [".env", "data", "maps", ".github", ".vscode"] +console_output_style = "count" +addopts = "--ruff --cov-branch --cov=wmrc --cov-report term --cov-report xml:cov.xml --instafail" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 963546f..0000000 --- a/setup.cfg +++ /dev/null @@ -1,19 +0,0 @@ -[yapf] -based_on_style=google -ALLOW_SPLIT_BEFORE_DICT_VALUE=False -COLUMN_LIMIT=120 -COALESCE_BRACKETS=True -DEDENT_CLOSING_BRACKETS=True -EACH_DICT_ENTRY_ON_SEPARATE_LINE=True -INDENT_DICTIONARY_VALUE=False -SPLIT_BEFORE_DOT=True -[tool:isort] -line_length=120 -multi_line_output=5 -[tool:pytest] -testpaths = tests src -norecursedirs = .env data maps -show_capture = True -minversion = 3.5 -console_output_style = count -addopts = --cov-branch --cov=wmrc --cov-report term --cov-report xml:cov.xml --instafail diff --git a/setup.py b/setup.py index 3f625d3..a492975 100644 --- a/setup.py +++ b/setup.py @@ -4,62 +4,62 @@ setup.py A module that installs the wmrc skid as a module """ -from glob import glob -from os.path import basename, splitext +from pathlib import Path from setuptools import find_packages, setup #: Load version from source file version = {} -with open('src/wmrc/version.py') as fp: - exec(fp.read(), version) +version_file = Path(__file__).parent / "src" / "wmrc" / "version.py" +exec(version_file.read_text(), version) + setup( - name='wmrc-skid', - version=version['__version__'], - license='MIT', - description='Update the wmrc data from Google Sheets via GCF', - author='Jacob Adams', - author_email='jdadms@utah.gov', - url='https://github.com/agrc/wmrc-skid', - packages=find_packages('src'), - package_dir={'': 'src'}, - py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')], + name="wmrc-skid", + version=version["__version__"], + license="MIT", + long_description=(Path(__file__).parent / "README.md").read_text(), + long_description_content_type="text/markdown", + author="Jacob Adams", + author_email="jdadms@utah.gov", + url="https://github.com/agrc/wmrc-skid", + packages=find_packages("src"), + package_dir={"": "src"}, include_package_data=True, zip_safe=True, classifiers=[ # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Topic :: Utilities', + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Topic :: Utilities", ], project_urls={ - 'Issue Tracker': 'https://github.com/agrc/wmrc-skid/issues', + "Issue Tracker": "https://github.com/agrc/wmrc-skid/issues", }, - keywords=['gis'], + keywords=["gis"], install_requires=[ - 'ugrc-palletjack>=4.1,<4.4', - 'agrc-supervisor==3.0.*', + "ugrc-palletjack>=4.1,<4.5", + "agrc-supervisor==3.0.*", ], extras_require={ - 'tests': [ - 'pylint-quotes~=0.2', - 'pylint>=2.15,<4.0', - 'pytest-cov>=4,<6', - 'pytest-instafail~=0.4', - 'pytest-isort>=3.1,<5.0', - 'pytest-pylint~=0.19', - 'pytest-watch~=4.2', - 'pytest>=7.2,<9.0', - 'yapf~=0.32', - 'pytest-mock>=3.10,<3.15', - 'functions-framework~=3.3', + "tests": [ + "pytest-cov>=3,<5", + "pytest-instafail==0.5.*", + "pytest-mock==3.*", + "pytest-ruff==0.*", + "pytest-watch==4.*", + "pytest>=6,<8", + "black>=23.3,<23.12", + "ruff==0.0.*", + "functions-framework==3.4.*", ] }, setup_requires=[ - 'pytest-runner', + "pytest-runner", ], - entry_points={'console_scripts': [ - 'wmrc = wmrc.main:process', - ]}, + entry_points={ + "console_scripts": [ + "wmrc = wmrc.main:process", + ] + }, ) diff --git a/src/wmrc/config.py b/src/wmrc/config.py index a3138c4..7b126af 100644 --- a/src/wmrc/config.py +++ b/src/wmrc/config.py @@ -6,12 +6,12 @@ import socket import urllib.request -SKID_NAME = 'wmrc' +SKID_NAME = "wmrc" #: Try to get project id from GCP metadata server for hostname. If it's empty or errors out, revert to local hostname try: - url = 'http://metadata.google.internal/computeMetadata/v1/project/project-id' + url = "http://metadata.google.internal/computeMetadata/v1/project/project-id" req = urllib.request.Request(url) - req.add_header('Metadata-Flavor', 'Google') + req.add_header("Metadata-Flavor", "Google") project_id = urllib.request.urlopen(req).read().decode() if not project_id: raise ValueError @@ -19,17 +19,17 @@ except Exception: HOST_NAME = socket.gethostname() -AGOL_ORG = 'https://utahdeq.maps.arcgis.com' +AGOL_ORG = "https://utahdeq.maps.arcgis.com" SENDGRID_SETTINGS = { #: Settings for SendGridHandler - 'from_address': 'noreply@utah.gov', - 'to_addresses': ['jdadams@utah.gov', 'stevienorcross@utah.gov', 'gerardorodriguez@utah.gov'], - 'prefix': f'{SKID_NAME} on {HOST_NAME}: ', + "from_address": "noreply@utah.gov", + "to_addresses": ["jdadams@utah.gov", "stevienorcross@utah.gov", "gerardorodriguez@utah.gov"], + "prefix": f"{SKID_NAME} on {HOST_NAME}: ", } LOG_LEVEL = logging.DEBUG -LOG_FILE_NAME = 'log' +LOG_FILE_NAME = "log" -COUNTIES_ITEMID = '90431cac2f9f49f4bcf1505419583753' +COUNTIES_ITEMID = "90431cac2f9f49f4bcf1505419583753" -# FEATURE_LAYER_ITEMID = '4df06137fb0a45459e49107a5f47a326' #: Beta version -FEATURE_LAYER_ITEMID = '056bbc52ff3240f6b69666750a61aeff' #: Live version -JOIN_COLUMN = 'id_' +# FEATURE_LAYER_ITEMID = "4df06137fb0a45459e49107a5f47a326" #: Beta version +FEATURE_LAYER_ITEMID = "056bbc52ff3240f6b69666750a61aeff" #: Live version +JOIN_COLUMN = "id_" diff --git a/src/wmrc/main.py b/src/wmrc/main.py index 431fa6a..d19a6fc 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -4,10 +4,7 @@ Run the wmrc script as a cloud function. """ import json -import locale import logging -import os -import pprint import sys from datetime import date, datetime from pathlib import Path @@ -17,8 +14,8 @@ import arcgis import google.auth import pandas as pd -from arcgis.features import GeoAccessor, GeoSeriesAccessor -from palletjack import extract, transform, load +from arcgis.features import GeoAccessor, GeoSeriesAccessor # noqa: F401 +from palletjack import extract, load, transform from supervisor.message_handlers import SendGridHandler from supervisor.models import MessageDetails, Supervisor @@ -41,21 +38,21 @@ def _get_secrets(): dict: The secrets .json loaded as a dictionary """ - secret_folder = Path('/secrets') + secret_folder = Path("/secrets") #: Try to get the secrets from the Cloud Function mount point if secret_folder.exists(): - secrets_dict = json.loads(Path('/secrets/app/secrets.json').read_text(encoding='utf-8')) + secrets_dict = json.loads(Path("/secrets/app/secrets.json").read_text(encoding="utf-8")) credentials, _ = google.auth.default() - secrets_dict['SERVICE_ACCOUNT_JSON'] = credentials + secrets_dict["SERVICE_ACCOUNT_JSON"] = credentials return secrets_dict #: Otherwise, try to load a local copy for local development - secret_folder = (Path(__file__).parent / 'secrets') + secret_folder = Path(__file__).parent / "secrets" if secret_folder.exists(): - return json.loads((secret_folder / 'secrets.json').read_text(encoding='utf-8')) + return json.loads((secret_folder / "secrets.json").read_text(encoding="utf-8")) - raise FileNotFoundError('Secrets folder not found; secrets not loaded.') + raise FileNotFoundError("Secrets folder not found; secrets not loaded.") def _initialize(log_path, sendgrid_api_key): @@ -71,17 +68,17 @@ def _initialize(log_path, sendgrid_api_key): skid_logger = logging.getLogger(config.SKID_NAME) skid_logger.setLevel(config.LOG_LEVEL) - palletjack_logger = logging.getLogger('palletjack') + palletjack_logger = logging.getLogger("palletjack") palletjack_logger.setLevel(config.LOG_LEVEL) cli_handler = logging.StreamHandler(sys.stdout) cli_handler.setLevel(config.LOG_LEVEL) formatter = logging.Formatter( - fmt='%(levelname)-7s %(asctime)s %(name)15s:%(lineno)5s %(message)s', datefmt='%Y-%m-%d %H:%M:%S' + fmt="%(levelname)-7s %(asctime)s %(name)15s:%(lineno)5s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) cli_handler.setFormatter(formatter) - log_handler = logging.FileHandler(log_path, mode='w') + log_handler = logging.FileHandler(log_path, mode="w") log_handler.setLevel(config.LOG_LEVEL) log_handler.setFormatter(formatter) @@ -95,10 +92,10 @@ def _initialize(log_path, sendgrid_api_key): #: (all log messages were duplicated if put at beginning) logging.captureWarnings(True) - skid_logger.debug('Creating Supervisor object') + skid_logger.debug("Creating Supervisor object") skid_supervisor = Supervisor(handle_errors=False) sendgrid_settings = config.SENDGRID_SETTINGS - sendgrid_settings['api_key'] = sendgrid_api_key + sendgrid_settings["api_key"] = sendgrid_api_key skid_supervisor.add_message_handler( SendGridHandler( sendgrid_settings=sendgrid_settings, client_name=config.SKID_NAME, client_version=version.__version__ @@ -122,13 +119,12 @@ def _remove_log_file_handlers(log_name, loggers): if log_name in handler.stream.name: logger.removeHandler(handler) handler.close() - except Exception as error: + except Exception: pass def process(): - """The main function that does all the work. - """ + """The main function that does all the work.""" #: Set up secrets, tempdir, supervisor, and logging start = datetime.now() @@ -151,113 +147,122 @@ def process(): gis = arcgis.gis.GIS(config.AGOL_ORG, secrets.AGOL_USER, secrets.AGOL_PASSWORD) #: Do the work - module_logger.info('Loading data from Google Sheets...') + module_logger.info("Loading data from Google Sheets...") combined_df = _parse_from_google_sheets(secrets) - module_logger.info('Adding county names from SGID county boundaries...') + module_logger.info("Adding county names from SGID county boundaries...") with_counties_df = _get_county_names(combined_df, gis) - module_logger.info('Preparing data for truncate and load...') + module_logger.info("Preparing data for truncate and load...") proj_df = with_counties_df.copy() proj_df.spatial.project(4326) - proj_df.spatial.set_geometry('SHAPE') - proj_df.spatial.sr = {'wkid': 4326} - proj_df['last_updated'] = date.today() - proj_df = transform.DataCleaning.switch_to_datetime(proj_df, ['last_updated']) + proj_df.spatial.set_geometry("SHAPE") + proj_df.spatial.sr = {"wkid": 4326} + proj_df["last_updated"] = date.today() + proj_df = transform.DataCleaning.switch_to_datetime(proj_df, ["last_updated"]) proj_df = transform.DataCleaning.switch_to_float( - proj_df, [ - 'latitude', - 'longitude', - 'tons_of_material_diverted_from_', - 'gallons_of_used_oil_collected_for_recycling_last_year', - ] + proj_df, + [ + "latitude", + "longitude", + # "tons_of_material_diverted_from_", + "gallons_of_used_oil_collected_for_recycling_last_year", + ], ) - module_logger.info('Truncating and loading...') + proj_df.drop(columns=["local_health_department", "uocc_email_address"], inplace=True) + + module_logger.info("Truncating and loading...") updater = load.FeatureServiceUpdater(gis, config.FEATURE_LAYER_ITEMID, tempdir) load_count = updater.truncate_and_load_features(proj_df) end = datetime.now() summary_message = MessageDetails() - summary_message.subject = f'{config.SKID_NAME} Update Summary' + summary_message.subject = f"{config.SKID_NAME} Update Summary" summary_rows = [ f'{config.SKID_NAME} update {start.strftime("%Y-%m-%d")}', - '=' * 20, - '', + "=" * 20, + "", f'Start time: {start.strftime("%H:%M:%S")}', f'End time: {end.strftime("%H:%M:%S")}', - f'Duration: {str(end-start)}', - '', - f'Rows loaded: {load_count}', + f"Duration: {str(end-start)}", + "", + f"Rows loaded: {load_count}", ] - summary_message.message = '\n'.join(summary_rows) + summary_message.message = "\n".join(summary_rows) summary_message.attachments = tempdir_path / log_name skid_supervisor.notify(summary_message) #: Remove file handler so the tempdir will close properly - loggers = [logging.getLogger(config.SKID_NAME), logging.getLogger('palletjack')] + loggers = [logging.getLogger(config.SKID_NAME), logging.getLogger("palletjack")] _remove_log_file_handlers(log_name, loggers) def _parse_from_google_sheets(secrets): - #: Get individual sheets gsheet_extractor = extract.GSheetLoader(secrets.SERVICE_ACCOUNT_JSON) - sw_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, 'SW Facilities', by_title=True) - uocc_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, 'UOCCs', by_title=True) + sw_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, "SW Facilities", by_title=True) + uocc_df = gsheet_extractor.load_specific_worksheet_into_dataframe(secrets.SHEET_ID, "UOCCs", by_title=True) #: Fix columns - sw_df.drop(columns=[''], inplace=True) #: Drop empty columns that don't have a name + try: + sw_df.drop(columns=[""], inplace=True) #: Drop empty columns that don't have a name + except KeyError: + pass + sw_df.rename( - columns={'Accept Material\n Dropped \n Off by the Public': 'Accept Material Dropped Off by the Public'}, - inplace=True + columns={"Accept Material\n Dropped \n Off by the Public": "Accept Material Dropped Off by the Public"}, + inplace=True, ) uocc_df.rename( columns={ - 'Type': 'Class', - 'Accept Material\n Dropped \n Off by the Public': 'Accept Material Dropped Off by the Public' + "Type": "Class", + "Accept Material\n Dropped \n Off by the Public": "Accept Material Dropped Off by the Public", }, - inplace=True + inplace=True, ) combined_df = pd.concat([sw_df, uocc_df]).query('Status in ["Open", "OPEN"]') - renamed_df = transform.DataCleaning.rename_dataframe_columns_for_agol(combined_df).rename(columns=str.lower).rename( - columns={ - 'longitude_': 'longitude', - 'accept_material_dropped_off_by_the_public': 'accept_material_dropped_off_by_', - 'tons_of_material_diverted_from_landfills_last_year': 'tons_of_material_diverted_from_' - } + renamed_df = ( + transform.DataCleaning.rename_dataframe_columns_for_agol(combined_df) + .rename(columns=str.lower) + .rename( + columns={ + "longitude_": "longitude", + "accept_material_dropped_off_by_the_public": "accept_material_dropped_off_by_", + "tons_of_material_diverted_from_landfills_last_year": "tons_of_material_diverted_from_", + } + ) ) return renamed_df def _get_county_names(input_df, gis): - #: Load counties from open data feature service counties_df = pd.DataFrame.spatial.from_layer( arcgis.features.FeatureLayer.fromitem(gis.content.get(config.COUNTIES_ITEMID)) ) counties_df.spatial.project(26912) counties_df.reset_index(inplace=True) - counties_df = counties_df.reindex(columns=['SHAPE', 'NAME']) #: We only care about the county name - counties_df.spatial.set_geometry('SHAPE') - counties_df.spatial.sr = {'wkid': 26912} + counties_df = counties_df.reindex(columns=["SHAPE", "NAME"]) #: We only care about the county name + counties_df.spatial.set_geometry("SHAPE") + counties_df.spatial.sr = {"wkid": 26912} #: Convert dataframe to spatial - spatial_df = pd.DataFrame.spatial.from_xy(input_df, x_column='longitude', y_column='latitude') + spatial_df = pd.DataFrame.spatial.from_xy(input_df, x_column="longitude", y_column="latitude") spatial_df.reset_index(drop=True, inplace=True) spatial_df.spatial.project(26912) - spatial_df.spatial.set_geometry('SHAPE') - spatial_df.spatial.sr = {'wkid': 26912} + spatial_df.spatial.set_geometry("SHAPE") + spatial_df.spatial.sr = {"wkid": 26912} #: Perform the join, clean up the output - joined_points_df = spatial_df.spatial.join(counties_df, 'left', 'within') - joined_points_df.drop(columns=['index_right'], inplace=True) - joined_points_df.rename(columns={'NAME': 'county_name'}, inplace=True) - joined_points_df['county_name'] = joined_points_df['county_name'].str.title() + joined_points_df = spatial_df.spatial.join(counties_df, "left", "within") + joined_points_df.drop(columns=["index_right"], inplace=True) + joined_points_df.rename(columns={"NAME": "county_name"}, inplace=True) + joined_points_df["county_name"] = joined_points_df["county_name"].str.title() return joined_points_df @@ -293,5 +298,5 @@ def main(event, context): # pylint: disable=unused-argument #: Putting this here means you can call the file via `python main.py` and it will run. Useful for pre-GCF testing. -if __name__ == '__main__': +if __name__ == "__main__": process() diff --git a/src/wmrc/requirements.txt b/src/wmrc/requirements.txt index 1014d7b..f9eae26 100644 --- a/src/wmrc/requirements.txt +++ b/src/wmrc/requirements.txt @@ -1,3 +1,4 @@ -agrc-supervisor==3.0.* -ugrc-palletjack==4.2.* -google-cloud-storage==2.3.* +agrc-supervisor==3.0.3 +ugrc-palletjack==4.4.* +google-cloud-storage==2.16.* +requests<2.32 diff --git a/src/wmrc/version.py b/src/wmrc/version.py index 3720cdc..22b96d0 100644 --- a/src/wmrc/version.py +++ b/src/wmrc/version.py @@ -3,4 +3,4 @@ This must only include a single line: __version__ = 'x.y.z' """ -__version__ = '1.1.0' +__version__ = "1.1.0"