-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathscottish_parliament.py
110 lines (91 loc) · 3.58 KB
/
scottish_parliament.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from processor import Processor
DATASETS_LINK = "https://data.parliament.scot/#/datasets"
DATASETS_LICENCE = "Scottish Parliament Copyright Policy"
class ProcessorScottishParliament(Processor):
"""Processor for Scottish Parliament's open data portal"""
def __init__(self):
"""Base init for type and URL list"""
super().__init__(type="bespoke_ScottishParliament")
def build_dataset_resources(self, xml_link, json_link, csv_link, date_updated):
"""Build dataset resources by checking for urls"""
dataset_resources = []
if xml_link is not None:
dataset_resources.append(
{
"fileName": "XML",
"fileSize": None,
"fileSizeUnit": None,
"fileType": "XML",
"assetUrl": xml_link,
"dateCreated": None,
"dateUpdated": date_updated,
"numRecords": None,
}
)
if json_link is not None:
dataset_resources.append(
{
"fileName": "JSON",
"fileSize": None,
"fileSizeUnit": None,
"fileType": "JSON",
"assetUrl": json_link,
"dateCreated": None,
"dateUpdated": date_updated,
"numRecords": None,
}
)
if csv_link is not None:
dataset_resources.append(
{
"fileName": "CSV",
"fileSize": None,
"fileSizeUnit": None,
"fileType": "CSV",
"assetUrl": csv_link,
"dateCreated": None,
"dateUpdated": date_updated,
"numRecords": None,
}
)
return dataset_resources
def get_datasets(self, owner, url, fname):
"""Gets datasets from provided portal and outputs to JSON"""
print(f"Processing {url}")
datasets_url = f"{url}api/datasetjson"
datasets = self.get_json(datasets_url)
print(f"Found {len(datasets)} datasets")
prepped_datasets = []
for dataset in datasets:
dataset_title = dataset.get("Title", "")
dataset_owner = owner
dataset_page_url = DATASETS_LINK
dataset_date_created = None
dataset_date_updated = dataset.get("LastUpdated", "")
dataset_licence = DATASETS_LICENCE
dataset_description = dataset.get("Description", "")
dataset_tags = []
dataset_resources = self.build_dataset_resources(
dataset.get("XmlLink", None),
dataset.get("JsonLink", None),
dataset.get("CsvLink", None),
dataset_date_updated,
)
prepped_datasets.append(
{
"title": dataset_title,
"owner": dataset_owner,
"pageURL": dataset_page_url,
"dateCreated": dataset_date_created,
"dateUpdated": dataset_date_updated,
"licence": dataset_licence,
"description": dataset_description,
"tags": dataset_tags,
"resources": dataset_resources,
}
)
print(fname)
processor.write_json(fname, prepped_datasets)
processor = ProcessorScottishParliament()
if __name__ == "__main__":
processor.process("json")