Skip to content

Commit

Permalink
update to use esupy for requests
Browse files Browse the repository at this point in the history
  • Loading branch information
bl-young committed Sep 15, 2023
1 parent 9374f29 commit aee1f20
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 42 deletions.
37 changes: 12 additions & 25 deletions stewi/DMR.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
2014-2021
"""

import requests
import pandas as pd
import argparse
import urllib
Expand All @@ -27,6 +26,7 @@
from io import BytesIO

from esupy.processed_data_mgmt import read_source_metadata
from esupy.remote import make_url_request
from stewi.globals import unit_convert,\
DATA_PATH, lb_kg, write_metadata, get_reliability_table_for_source,\
log, compile_source_metadata, config, store_inventory, set_stewi_meta,\
Expand Down Expand Up @@ -134,31 +134,18 @@ def download_data(url_params, filepath: Path) -> str:
df = pd.DataFrame()
url = generate_url(url_params)
log.debug(url)
for attempt in range(3):
try:
r = requests.get(url)
r.raise_for_status()
# When more than 100,000 records, need to split queries
if ((len(r.content) < 1000) and
('Maximum number of records' in str(r.content))):
for x in ('NGP', 'GPC', 'NPD'):
split_url = f'{url}&p_permit_type={x}'
r = requests.get(split_url)
r.raise_for_status()
df_sub = pd.read_csv(BytesIO(r.content), low_memory=False)
if len(df_sub) < 3: continue
df = pd.concat([df, df_sub], ignore_index=True)
else:
df = pd.read_csv(BytesIO(r.content), low_memory=False)
break
except (requests.exceptions.HTTPError,
requests.exceptions.ConnectionError) as err:
log.info(err)
time.sleep(20)
pass
r = make_url_request(url)
# When more than 100,000 records, need to split queries
if ((len(r.content) < 1000) and
('Maximum number of records' in str(r.content))):
for x in ('NGP', 'GPC', 'NPD'):
split_url = f'{url}&p_permit_type={x}'
r = make_url_request(split_url)
df_sub = pd.read_csv(BytesIO(r.content), low_memory=False)
if len(df_sub) < 3: continue
df = pd.concat([df, df_sub], ignore_index=True)
else:
log.warning("exceeded max attempts")
return 'other_error'
df = pd.read_csv(BytesIO(r.content), low_memory=False)
log.debug(f"saving to {filepath}")
pd.to_pickle(df, filepath)
return 'success'
Expand Down
4 changes: 2 additions & 2 deletions stewi/GHGRP.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import pandas as pd
import numpy as np
import requests
import time
import argparse
import warnings
Expand All @@ -37,6 +36,7 @@
from xml.parsers.expat import ExpatError

from esupy.processed_data_mgmt import read_source_metadata
from esupy.remote import make_url_request
from stewi.globals import download_table, write_metadata, import_table, \
DATA_PATH, get_reliability_table_for_source, set_stewi_meta, config,\
store_inventory, paths, log, \
Expand Down Expand Up @@ -119,7 +119,7 @@ def get_row_count(table, report_year):
count_url += f'/REPORTING_YEAR/=/{report_year}'
count_url += '/COUNT'
try:
count_request = requests.get(count_url)
count_request = make_url_request(count_url)
count_xml = minidom.parseString(count_request.text)
table_count = count_xml.getElementsByTagName('TOTALQUERYRESULTS')
table_count = int(table_count[0].firstChild.nodeValue)
Expand Down
13 changes: 2 additions & 11 deletions stewi/NEI.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@

import numpy as np
import pandas as pd
import requests

from esupy.processed_data_mgmt import download_from_remote,\
read_source_metadata
from esupy.remote import make_url_request
from esupy.util import strip_file_extension
from stewi.globals import DATA_PATH, write_metadata, USton_kg, lb_kg,\
log, store_inventory, config, assign_secondary_context,\
Expand Down Expand Up @@ -128,16 +128,7 @@ def generate_national_totals(year):
url = build_url.replace('__year__', year)
url = url.replace('__file__', file)

# make http request
r = []
try:
r = requests.Session().get(url, verify=False)
except requests.exceptions.ConnectionError:
log.error(f"URL Connection Error for {url}")
try:
r.raise_for_status()
except requests.exceptions.HTTPError:
log.error('Error in URL request!')
r = make_url_request(url, verify=False)

# extract data from zip archive
z = zipfile.ZipFile(io.BytesIO(r.content))
Expand Down
10 changes: 6 additions & 4 deletions stewi/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
import time
import urllib
import copy
import shutil
import zipfile
import io
from datetime import datetime
from pathlib import Path

Expand All @@ -22,6 +25,7 @@
write_df_to_file, write_metadata_to_file,\
download_from_remote
from esupy.dqi import get_weighted_average
from esupy.remote import make_url_request
from esupy.util import get_git_hash
import stewi.exceptions

Expand Down Expand Up @@ -97,12 +101,10 @@ def download_table(filepath: Path, url: str, get_time=False):
"""Download file at url to Path if it does not exist."""
if not filepath.exists():
if url.lower().endswith('zip'):
import zipfile, requests, io
table_request = requests.get(url).content
zip_file = zipfile.ZipFile(io.BytesIO(table_request))
r = make_url_request(url)
zip_file = zipfile.ZipFile(io.BytesIO(r.content))
zip_file.extractall(filepath)
elif 'xls' in url.lower() or url.lower().endswith('excel'):
import shutil
try:
with urllib.request.urlopen(url) as response, open(filepath, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
Expand Down

0 comments on commit aee1f20

Please sign in to comment.