-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathENCODETools.py
331 lines (302 loc) · 12.1 KB
/
ENCODETools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import sys
import csv
import json
import jsonschema
import requests
import gdata.spreadsheet.service
# set headers
HEADERS = {'content-type': 'application/json'}
def get_ENCODE(obj_id,keys):
'''GET an ENCODE object as JSON and return as dict'''
url = keys['server']+obj_id+'?limit=all'
response = requests.get(url, auth=(keys['authid'],keys['authpw']), headers=HEADERS)
if not response.status_code == 200:
print >> sys.stderr, response.text
return response.json()
def GetENCODE(object_id,keys):
'''GET an ENCODE object as JSON and return as dict'''
if type(object_id) is str:
url = keys['server']+object_id+'?limit=all'
#print(url)
try:
response = requests.get(url,auth=(keys['authid'],keys['authpw']), headers=HEADERS)
if not response.status_code == 200:
print >> sys.stderr, response.text
# no
except Exception as e:
print("Get request failed:")
#print(e)
# yes
else:
return response.json()
def patch_ENCODE(obj_id,patch_json,keys):
'''PATCH an existing ENCODE object and return the response JSON'''
url = keys['server']+obj_id
json_payload = json.dumps(patch_json)
response = requests.patch(url, auth=(keys['authid'],keys['authpw']), data=json_payload)
print "Patch:"
print response.status_code
if not response.status_code == 200:
print >> sys.stderr, response.text
return response.json()
def new_ENCODE(collection_id, object_json,keys):
'''POST an ENCODE object as JSON and return the resppnse JSON'''
url = keys['server'] +'/'+collection_id+'/'
json_payload = json.dumps(object_json)
response = requests.post(url, auth=(keys['authid'],keys['authpw']), headers=HEADERS, data=json_payload)
print(response.status_code)
if not response.status_code == 201:
print >> sys.stderr, response.text
return response.json()
def KeyENCODE(key_file,user_name,server_name):
'''
get keys from file
'''
key_open = open(key_file)
keys = csv.DictReader(key_open,delimiter = '\t')
for key in keys:
if (key.get('Server') == server_name) & (key.get('User') == user_name):
key_info = {}
key_info['user'] = key.get('User')
key_info['server'] = ('http://' + key.get('Server') + '.encodedcc.org')
key_info['authid'] = key.get('ID')
key_info['authpw'] = key.get('PW')
print('Identity confirmed')
key_open.close()
return(key_info)
def ReadJSON(json_file):
'''
read json objects from file
'''
json_load = open(json_file)
json_read = json.load(json_load)
json_load.close()
# if the returned json object is not a list, put it in one
if type(json_read) is dict:
json_list = []
json_list.append(json_read)
elif type(json_read) is list:
json_list = json_read
return json_list
def WriteJSON(new_object,object_file):
'''
write new json obect.
'''
# SHOULD BE MODIFIED TO CUSTOM OUTPUT FORMAT (FOR HUMAN VIEWING)
with open(object_file, 'w') as outfile:
json.dump(new_object, outfile)
outfile.close()
def ValidJSON(object_type,object_id,new_object,keys):
'''
check json object for validity
'''
# SHOULD ONLY NEED OBJECT. NEED DEF TO EXTRACT VALUE (LIKE TYPE) FROM JSON OBJECT GRACEFULLY.
# get the relevant schema
object_schema = GetENCODE(('/profiles/' + object_type + '.json'),keys)
# test the new object. SHOULD HANDLE ERRORS GRACEFULLY
try:
jsonschema.validate(new_object,object_schema)
# did not validate
except Exception as e:
print('Validation of ' + object_id + ' failed.')
print(e)
return False
# did validate
else:
# inform the user of the success
print('Validation of ' + object_id + ' succeeded.')
return True
def CleanJSON(new_object,object_schema,action):
'''
intended to fix invalid JSON. removes unexpected or unpatchable properties
'''
# DOES NOT REMOVE ITEMS THAT CAN ONLY BE POSTED
for key in new_object.keys():
if not object_schema[u'properties'].get(key):
new_object.pop(key)
elif object_schema[u'properties'][key].has_key(u'requestMethod'):
if object_schema[u'properties'][key][u'requestMethod'] is []:
new_object.pop(key)
elif action not in object_schema[u'properties'][key][u'requestMethod']:
new_object.pop(key)
return new_object
def FlatJSON(json_object,keys):
'''
flatten embedded json objects to their ID
'''
# RATE LIMITING STEP: this should be changed to check whether it is needed or not
#json_object = EmbedJSON(json_object,keys)
#print json_object
for key,value in json_object.items():
if type(value) is dict:
#print key,value
if json_object[key].has_key(u'@id'):
json_object[key] = json_object[key][u'@id']
elif json_object[key].has_key(u'href'):
json_object[key] = json_object[key][u'href']
if type(value) is list:
#print("Found List: " + key)
value_new = []
for value_check in value:
#print("Checking...")
if type(value_check) is dict:
#print("Found Object")
if value_check.has_key(u'@id'):
value_check = value_check[u'@id']
elif value_check.has_key(u'href'):
value_check = value_check[u'href']
#print(value_check)
value_new.append(value_check)
json_object[key] = value_new
return json_object
def EmbedJSON(json_object,keys):
'''
expand json object
'''
for key,value in json_object.items():
if (type(value) is unicode):
if (len(value) > 1):
if str(value[0]) == '/':
json_sub_object = GetENCODE(str(value),keys)
if type(json_sub_object) is dict:
#json_sub_object = EmbedJSON(json_sub_object,keys)
json_object[key] = json_sub_object
elif type(value) is list:
values_embed = []
for entry in value:
if (type(entry) is unicode):
if (len(entry) > 1):
if str(entry[0]) == '/':
json_sub_object = GetENCODE(str(entry),keys)
if type(json_sub_object) is dict:
#json_sub_object = EmbedJSON(json_sub_object,keys)
values_embed.append(json_sub_object)
if len(values_embed) is len(json_object[key]):
json_object[key] = values_embed
return json_object
def FindSets(jsonobjects,query,returnset):
'''
Find a set of objects that contain a particular key value pair in any part of the set.
Input
jsonobjects: a list of JSON objects that will be searched.
This can either be a uniform collection or not, but each object
will be treated as a set.
query: a dict with key:value pair(s) to search for.
Currently, only works as an 'OR' search.
returnset: a string to indicate how to return values
'original': returns only root object
'only': returns only objects containing the match
'all': returns all objects from the set with the match
Output
foundobjects: a list of JSON objects that match the search parameters.
otherobjects: a list of JSON objects that don't match.
'''
foundobjects = []
otherobjects = []
for jsonobject in jsonobjects:
if jsonobject.has_key(u'@id'):
subfoundobjects = []
subotherobjects = []
foundobject = False
querycheck = {}
#print('Checking...')
for key,value in jsonobject.items():
if type(value) is dict:
#print('Dictionary')
#print value
[sfobjs,soobjs] = FindSets([value],query,returnset)
if sfobjs:
for sfobj in sfobjs:
subfoundobjects.append(sfobj)
if soobjs:
for soobj in soobjs:
subotherobjects.append(soobj)
elif value and (type(value) is list) and (type(value[0]) is dict):
#print('Dictionary List')
for item in value:
[sfobjs,soobjs] = FindSets([item],query,returnset)
if sfobjs:
for sfobj in sfobjs:
subfoundobjects.append(sfobj)
if soobjs:
for soobj in soobjs:
subotherobjects.append(soobj)
elif value and ((type(value) is list) and (type(value[0]) is not dict)) or (type(value) is not dict) or (type(value) is not list):
#print('Checking...')
for searchkey,searchvalue in query.items():
if searchkey in str(key):
#print 'inkey',key,value
if searchvalue in str(value):
#print 'invalue',value
querycheck.update({searchkey:searchvalue})
# CURRENTLY ONLY CHECKS FOR ANY HIT. WORKS LIKE 'OR' INSTEAD OF 'AND'.
if querycheck:
#print 'Found.'
foundobject = True
if foundobject:
foundobjects.append(jsonobject)
elif subfoundobjects and ((returnset == 'all') or (returnset == 'original')):
foundobjects.append(jsonobject)
else:
otherobjects.append(jsonobject)
if subfoundobjects and ((returnset == 'all') or (returnset == 'only')):
for subfoundobject in subfoundobjects:
foundobjects.append(subfoundobject)
if subfoundobjects and subotherobjects and (returnset == 'all'):
for subotherobject in subotherobjects:
foundobjects.append(subotherobject)
else:
for subotherobject in subotherobjects:
otherobjects.append(subotherobject)
if foundobjects:
foundobjects = {foundobj['@id']:foundobj for foundobj in foundobjects}.values()
if otherobjects:
otherobjects = {otherobj['@id']:otherobj for otherobj in otherobjects}.values()
return foundobjects,otherobjects
def LoginGSheet(email,password):
'''
start a connection
'''
sheetclient = gdata.spreadsheet.service.SpreadsheetsService()
sheetclient.email = email
sheetclient.password = password
sheetclient.ProgrammaticLogin()
return sheetclient
def FindGSpreadSheet(sheetclient,spreadname):
'''
find a specific spreadsheet and get the id
'''
query = gdata.spreadsheet.service.DocumentQuery()
query.title = spreadname
query.title_exact = 'true'
spreadfeed = sheetclient.GetSpreadsheetsFeed(query=query)
if len(spreadfeed.entry) >= 1:
spreadsheet = spreadfeed.entry[0]
spreadid = spreadsheet.id.text.rsplit('/',1)[1]
else:
spreadsheet = ''
spreadid = ''
return(spreadid,spreadsheet)
def FindGWorkSheet(sheetclient,spreadid,workname):
'''
find a specific worksheet and get the id
'''
query = gdata.spreadsheet.service.DocumentQuery()
query.title = workname
query.title_exact = 'true'
workfeed = sheetclient.GetWorksheetsFeed(spreadid,query=query)
if len(workfeed.entry) >= 1:
worksheet = workfeed.entry[0]
workid = worksheet.id.text.rsplit('/',1)[1]
else:
worksheet = ''
workid = ''
return(workid,worksheet)
def FindGSheetCells(sheetclient,spreadid,workid):
'''
find specified cells (currently returns all, including empty)
'''
query = gdata.spreadsheet.service.CellQuery()
query.return_empty = "true"
cells = sheetclient.GetCellsFeed(spreadid,workid,query=query).entry
return(cells)