-
Notifications
You must be signed in to change notification settings - Fork 6
/
oecd_json_get_timedout.py
67 lines (55 loc) · 2.32 KB
/
oecd_json_get_timedout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
import pandas as pd
from tqdm import tqdm
import logging
import datetime
import os
# http://stats.oecd.org/sdmx-json/data/<id>/all/all
# Get JSON datasets for all key families (dataset ids)
# where to save or read
LOG_DIR = 'logs'
STORE_DIR = 'OECD_json_datasets'
LOGFILE = os.path.join(LOG_DIR, 'timedout.log')
if not os.path.exists(LOG_DIR):
os.makedirs(LOG_DIR)
if not os.path.exists(STORE_DIR):
os.makedirs(STORE_DIR)
keyNamesFile = 'error_reports/timedout.csv'
# logging
logging.basicConfig(filename=LOGFILE, filemode='w', level=logging.DEBUG)
logging.debug("Log started at %s", str(datetime.datetime.now()))
# read in list of dataset ids
datasourceUrl = 'http://stats.oecd.org/sdmx-json/data/'
dataset_ids_df = pd.read_csv(keyNamesFile)
dataset_ids = dataset_ids_df['KeyFamilyId'].tolist()
success_count = 0
with requests.Session() as s:
for dataset_id in tqdm(dataset_ids):
try:
r = s.get(datasourceUrl + dataset_id + '/all/all', timeout=61)
except requests.exceptions.ReadTimeout:
print(dataset_id, ": OECD data request read timed out")
logging.debug('%s: OECD data request read timed out', dataset_id)
except requests.exceptions.Timeout:
print(dataset_id, ": OECD data request timed out")
logging.debug('%s: OECD data request timed out', dataset_id)
except requests.exceptions.HTTPError:
print(dataset_id, ": HTTP error")
logging.debug('%s: HTTP error', dataset_id)
except requests.exceptions.ConnectionError:
print(dataset_id, ": Connection error", )
logging.debug('%s: Connection error', dataset_id)
else:
if r.status_code == 200:
# save the json file - don't prettify to save space
target = os.path.join(STORE_DIR, dataset_id + ".json")
with open(target, 'w', encoding='utf-8') as f:
f.write(r.text)
success_count += 1
else:
print(dataset_id, 'HTTP Failed with code', r.status_code)
logging.debug('%s HTTP Failed with code %d', dataset_id, r.status_code)
print("completed ...")
print(len(dataset_ids), " Dataset Ids")
print(success_count, " datasets retrieved")
logging.debug("Log ended at %s", str(datetime.datetime.now()))