From aee1f209d2bf2cdb141f090a40f2c17e468bc826 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Fri, 15 Sep 2023 16:41:50 -0400 Subject: [PATCH] update to use esupy for requests --- stewi/DMR.py | 37 ++++++++++++------------------------- stewi/GHGRP.py | 4 ++-- stewi/NEI.py | 13 ++----------- stewi/globals.py | 10 ++++++---- 4 files changed, 22 insertions(+), 42 deletions(-) diff --git a/stewi/DMR.py b/stewi/DMR.py index cfa6651..7dfc2b0 100644 --- a/stewi/DMR.py +++ b/stewi/DMR.py @@ -18,7 +18,6 @@ 2014-2021 """ -import requests import pandas as pd import argparse import urllib @@ -27,6 +26,7 @@ from io import BytesIO from esupy.processed_data_mgmt import read_source_metadata +from esupy.remote import make_url_request from stewi.globals import unit_convert,\ DATA_PATH, lb_kg, write_metadata, get_reliability_table_for_source,\ log, compile_source_metadata, config, store_inventory, set_stewi_meta,\ @@ -134,31 +134,18 @@ def download_data(url_params, filepath: Path) -> str: df = pd.DataFrame() url = generate_url(url_params) log.debug(url) - for attempt in range(3): - try: - r = requests.get(url) - r.raise_for_status() - # When more than 100,000 records, need to split queries - if ((len(r.content) < 1000) and - ('Maximum number of records' in str(r.content))): - for x in ('NGP', 'GPC', 'NPD'): - split_url = f'{url}&p_permit_type={x}' - r = requests.get(split_url) - r.raise_for_status() - df_sub = pd.read_csv(BytesIO(r.content), low_memory=False) - if len(df_sub) < 3: continue - df = pd.concat([df, df_sub], ignore_index=True) - else: - df = pd.read_csv(BytesIO(r.content), low_memory=False) - break - except (requests.exceptions.HTTPError, - requests.exceptions.ConnectionError) as err: - log.info(err) - time.sleep(20) - pass + r = make_url_request(url) + # When more than 100,000 records, need to split queries + if ((len(r.content) < 1000) and + ('Maximum number of records' in str(r.content))): + for x in ('NGP', 'GPC', 'NPD'): + split_url = f'{url}&p_permit_type={x}' + r = make_url_request(split_url) + df_sub = pd.read_csv(BytesIO(r.content), low_memory=False) + if len(df_sub) < 3: continue + df = pd.concat([df, df_sub], ignore_index=True) else: - log.warning("exceeded max attempts") - return 'other_error' + df = pd.read_csv(BytesIO(r.content), low_memory=False) log.debug(f"saving to {filepath}") pd.to_pickle(df, filepath) return 'success' diff --git a/stewi/GHGRP.py b/stewi/GHGRP.py index 6bfed61..1ae13cd 100644 --- a/stewi/GHGRP.py +++ b/stewi/GHGRP.py @@ -28,7 +28,6 @@ import pandas as pd import numpy as np -import requests import time import argparse import warnings @@ -37,6 +36,7 @@ from xml.parsers.expat import ExpatError from esupy.processed_data_mgmt import read_source_metadata +from esupy.remote import make_url_request from stewi.globals import download_table, write_metadata, import_table, \ DATA_PATH, get_reliability_table_for_source, set_stewi_meta, config,\ store_inventory, paths, log, \ @@ -119,7 +119,7 @@ def get_row_count(table, report_year): count_url += f'/REPORTING_YEAR/=/{report_year}' count_url += '/COUNT' try: - count_request = requests.get(count_url) + count_request = make_url_request(count_url) count_xml = minidom.parseString(count_request.text) table_count = count_xml.getElementsByTagName('TOTALQUERYRESULTS') table_count = int(table_count[0].firstChild.nodeValue) diff --git a/stewi/NEI.py b/stewi/NEI.py index ef0f643..a265f0c 100644 --- a/stewi/NEI.py +++ b/stewi/NEI.py @@ -27,10 +27,10 @@ import numpy as np import pandas as pd -import requests from esupy.processed_data_mgmt import download_from_remote,\ read_source_metadata +from esupy.remote import make_url_request from esupy.util import strip_file_extension from stewi.globals import DATA_PATH, write_metadata, USton_kg, lb_kg,\ log, store_inventory, config, assign_secondary_context,\ @@ -128,16 +128,7 @@ def generate_national_totals(year): url = build_url.replace('__year__', year) url = url.replace('__file__', file) - # make http request - r = [] - try: - r = requests.Session().get(url, verify=False) - except requests.exceptions.ConnectionError: - log.error(f"URL Connection Error for {url}") - try: - r.raise_for_status() - except requests.exceptions.HTTPError: - log.error('Error in URL request!') + r = make_url_request(url, verify=False) # extract data from zip archive z = zipfile.ZipFile(io.BytesIO(r.content)) diff --git a/stewi/globals.py b/stewi/globals.py index 713ef61..fd9cd02 100644 --- a/stewi/globals.py +++ b/stewi/globals.py @@ -11,6 +11,9 @@ import time import urllib import copy +import shutil +import zipfile +import io from datetime import datetime from pathlib import Path @@ -22,6 +25,7 @@ write_df_to_file, write_metadata_to_file,\ download_from_remote from esupy.dqi import get_weighted_average +from esupy.remote import make_url_request from esupy.util import get_git_hash import stewi.exceptions @@ -97,12 +101,10 @@ def download_table(filepath: Path, url: str, get_time=False): """Download file at url to Path if it does not exist.""" if not filepath.exists(): if url.lower().endswith('zip'): - import zipfile, requests, io - table_request = requests.get(url).content - zip_file = zipfile.ZipFile(io.BytesIO(table_request)) + r = make_url_request(url) + zip_file = zipfile.ZipFile(io.BytesIO(r.content)) zip_file.extractall(filepath) elif 'xls' in url.lower() or url.lower().endswith('excel'): - import shutil try: with urllib.request.urlopen(url) as response, open(filepath, 'wb') as out_file: shutil.copyfileobj(response, out_file)