-
Notifications
You must be signed in to change notification settings - Fork 3
/
betydb2geojson.py
executable file
·178 lines (147 loc) · 6.74 KB
/
betydb2geojson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python3
"""Python script for converting BETYdb plot outlines to GeoJSON format
"""
import os
import argparse
import json
import sys
import requests
from osgeo import ogr
ENV_BETYDB_URL_NAME = 'BETYDB_URL'
def get_arguments() -> argparse.Namespace:
"""Adds arguments to the command line parser
Return:
Returns the parsed arguments
"""
parser = argparse.ArgumentParser(description="BETYdb plots to GeoJSON",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-u', '--betydb_url',
help='the URL of BETYdb instance to query (defaults to ' + ENV_BETYDB_URL_NAME +
' environment variable)', metavar='str', type=str, default=os.getenv('BETYDB_URL'))
parser.add_argument('-f', '--filter', nargs='*',
help='partial or full string filter for sitename values returned', metavar='str',
type=str, default='')
parser.add_argument('-o', '--outfile', help='the output file to write GeoJSON to', metavar='FILE',
type=argparse.FileType('wt'),
default='out.txt')
args = parser.parse_args()
if not args.betydb_url:
parser.error('--betydb_url is required')
return args
def query_betydb_experiments(betydb_url: str = None) -> dict:
"""Queries BETYdb for experiment information
Arguments:
betydb_url: the url to query
Return:
The dict containing the names of the found plots as the keys with their associated geometry
Exceptions:
A RuntimeError is raised if the needed BETYdb access information is not available.
Other exceptions may be thrown by the requests.get() or requests.raise_for_status() call
Notes:
If either of the parameters are None or not defined (evaluates to False), the environment is queried for that value.
It's an error to not have the url or key parameters undefined and not have environment variable equivalents defined
"""
# Fill in missing values if we can
if not betydb_url:
betydb_url = os.getenv(ENV_BETYDB_URL_NAME, None)
if not betydb_url:
raise RuntimeError("Unable to resolve BETYdb URL. Please ensure it's defined and try again.")
# Make the call to get the experiment data
url = betydb_url.rstrip('/') + '/api/v1/experiments'
params = {'associations_mode': 'full_info', 'limit': 'none'}
req = requests.get(url, params=params, timeout=300)
req.raise_for_status()
return req.json()
def get_experiment_site_geometries(experiments_json: dict, site_filter: str = None) -> dict:
"""Returns all the found sites by name with their associated geometries as Well Known Text (WKT)
Arguments:
experiments_json: the JSON containing the experiment data retrieved from BETYdb
site_filter: optional filter string to apply on sitenames
Return:
A dict with the found site names (plot names) as the keys with the geometry as the values
Exceptions:
Raises RuntimeError if an expected key is not found in the passed in JSON
Other exceptions can be raised by misconfigured JSON
"""
plots = {}
# Try loading the JSON
if 'data' not in experiments_json:
raise RuntimeError('Missing top-level "data" key from JSON: "%s"' % str(experiments_json))
# Find all the sites in all the returned experiments
for one_exp in experiments_json['data']:
if 'experiment' in one_exp and 'sites' in one_exp['experiment']:
for one_site in one_exp['experiment']['sites']:
if 'site' in one_site and 'geometry' in one_site['site'] and 'sitename' in one_site['site']:
# Check if there's a filter
if not site_filter or site_filter in one_site['site']['sitename']:
plots[one_site['site']['sitename']] = one_site['site']['geometry']
return plots
def sites_to_geojson(sites: dict) -> dict:
"""Converts the site geometries to GeoJSON format
Arguments:
sites: the dict of site names with their geometries in WKT (Well Known Text) format
Return:
Returns a dict with the geometries converted into GeoJSON format as a dict. The sites parameter is not altered
Exceptions:
Exceptions may be raised from OGR and OSR library calls
"""
plots_geo = {}
# Loop through converting the geometry format. We leave off CRS information since it's in WGS 84 lat-lon format
# (which is the assumed CRS of GeoJSON)
for site_name in sites:
geom = ogr.CreateGeometryFromWkt(sites[site_name])
plots_geo[site_name] = json.loads(geom.ExportToJson())
return plots_geo
def write_geojson(out_file, geojson_plots: dict) -> None:
"""Writes out the GeoJSON to the specified output file
Arguments:
out_file: where to write GeoJSON to (supports .write() as a file-like object)
geojson_plots: a dictionary of plot names and their associated geometry
Notes:
To reduce the memory footprint of writing the GeoJSON, the plots are written one at a time
"""
preamble = '{"type": "FeatureCollection","name": "BETYdb Sites","features": ['
postfix = ']}'
entry = {'type': 'Feature',
'properties': {
'id': '',
'observationUnitName': ''
},
'geometry': None
}
# Loop through the plots and write them out
separator = ''
plot_idx = 1
out_file.write(preamble)
for plot_name in geojson_plots:
entry['properties']['id'] = str(plot_idx)
entry['properties']['observationUnitName'] = plot_name
entry['geometry'] = geojson_plots[plot_name]
out_file.write(separator + json.dumps(entry))
separator = ','
plot_idx += 1
out_file.write(postfix)
def convert() -> None:
"""Performs the BETYdb to GeoJSON conversion
Return:
No return is defined
"""
# Get the command line parameters
args = get_arguments()
if not args.outfile:
raise RuntimeError("An output file must be specified to receive the GeoJSON plot information")
# Get the list of sites (plots) from the JSON returned
site_filter = args.filter
if site_filter:
site_filter = ' '.join(site_filter)
experiments_json = query_betydb_experiments(args.betydb_url)
sites = get_experiment_site_geometries(experiments_json, site_filter)
if not sites:
raise RuntimeError("No plots were found in the data returned from BETYdb")
# Format each of the plots to their GeoJSON equivalents
geojson_plots = sites_to_geojson(sites)
# Write out the GeoJSON
write_geojson(args.outfile, geojson_plots)
if __name__ == "__main__":
convert()
sys.exit()