-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
410 lines (328 loc) · 13.2 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import re
import json
def get_identified_verbs(pred):
"""This function returns a list of verbs that are identified in the prediction
:pred is the output of predictor.predict()
"""
verbs=[x['verb'] for x in pred['verbs']]
return verbs
def evaluate_PI_Polysem_DIR(predictor,data,verbose=True):
"""
This function return the failure rates percentage on a test set.
:predictor is predictor object
:data is dictionary in the shape {'verb':[s1,s2]}
WHERE s1 contains the wordd as a noun/adjective, S2 as verb
returns failure rate
"""
fail=0
for v in data:
s1,s2=data[v]
pred1=predictor.predict(s1)
pred2=predictor.predict(s2)
v1=get_identified_verbs(pred1)
v2=get_identified_verbs(pred2)
if v in v1:#IF verb is not found in s1 but is found in 2 is correct!
fail+=1
if verbose:
print(f"Failed for: {s1} ... [{v}] found as a verb ")
continue
if v not in v2:
fail+=1
if verbose:
print(f"Failed for: {s2} ... [{v}] not found as a verb")
continue
return fail/len(data)*100
def evaluate_PI_contractions_INV(predictor,data,verboose=True):
"""
This function returns a failure rate (percentage) that represents how many coupled sentnece
presents inconcitency in their tags of a contracted predicate. The idea is that a predictor should be able to identify a verb even if varieted.
Matching is TAG BASED: The models must have predicted a verb in the given position (inflected verb), if not is a failure.
:predictor
:data is a list of lists where the nested lists are couple of sentence and their index to be compared
returns a failure rate
"""
fail=0
for c,i in data:
s1,s2,index= c[0],c[1],i
pred1=predictor.predict(s1)
pred2=predictor.predict(s2)
v1=get_identified_verbs(pred1)
v2=get_identified_verbs(pred2)
if pred1['words'][index] not in v1 and pred2['words'][index] not in v2:
fail+=1
if verboose:
print(f"{pred1['words'][index]} or {pred2['words'][index]} not found as verb in {v1=} or {v2=}")
return fail/len(data)*100
def evaluate_PI_inflections_MFT(predictor,data):
"""
This functions tests MFT for inflected verb forms. Models needs to be able to detect them as verbs, if not, fail
:predictor
:data is a dict {verb:sentence}
output is failure rate
"""
fails=0
for v in data:
s=data[v]
pred=predictor.predict(s)
#print(pred)
verbs=get_identified_verbs(pred)
if v not in verbs: #IF verb is not found in s1 but is found in 2 is correct!
fails+=1
print(f"Failed for: {s} did not detect {v}\n")
#print(v,verbs)
continue
return (fails)/len(data)*100
def evaluate_INV_alltags(pred1,pred2,verbose=False):
"""
This functions matches the verbs and then control the tags of the verbs in two predictions. Verbs dont have to be the same tho!
"""
if len(pred1['verbs'])!=len(pred2['verbs']) or len(pred1['verbs'])==0 or len(pred2['verbs'])==0:
if verbose:
print("Not the same number of verbs identified in the sentences! Might also be 0!")
print(pred1['words'],pred2['words'])
print("Verbs found: ",[x['verb'] for x in pred1['verbs']],[x['verb'] for x in pred2['verbs']],"\n")
return False
for v in zip(pred1['verbs'],pred2['verbs']):
v1,v2=v
if v1['tags']!=v2['tags']:
if verbose:
print(f"missmatch in tags for verbs [{v1['verb']} and {v2['verb']}]")
print(f"sentences: {v1['description']} and {v2['description']}")
print(f"tags: {v1['tags']} and {v2['tags']}")
print("\n\n")
return False
continue
return True
def validate_INV_allverbs_ARGset(pred1,pred2,verbose=True):
"""
This functions matches the verbs and check the unique set of arguments of the verb in two predictions
"""
#assert pred1['words']==pred2['words'],f"ERROR, Comparing two different sentences"
if len(pred1['verbs'])!=len(pred2['verbs']):
if verbose:
print("not the same number of verbs were found!")
print(pred1['words'],pred2['words'])
print([x['verb'] for x in pred1['verbs']],[x['verb'] for x in pred2['verbs']],"\n")
return False
for v in zip(pred1['verbs'],pred2['verbs']):
v1,v2=v
if v1['verb']!=v2['verb']: #verbs should not change when propnouns are changing
if verbose:
print("missmatch in verb identification \n,v1['verb'] and v2['verb']\n")
return False
unique_tag1=get_unique_args(v1['tags'])
unique_tag2=get_unique_args(v2['tags'])
if unique_tag1!=unique_tag2:
if verbose:
print(f"missmatch in the arguments found tags of verb '{v1['verb']}'\n{v1['description']}\n{v2['description']}\n")
return False
continue
return True
def get_unique_args(s):
"""
Returns the unique arguments in a list of BIO tags.
"""
return set([x[2:] for x in s if x!='O'])
def get_dict_args(description):
"""
Returns a dictionary of the arguments found in the description of a verb
Key is the argument, value is the span"""
args=re.findall(r'\[.*?\]',description)#take the string with brakets (arguments and V)
argsdict={}
for x in args:
l=(x[1:-1].split(":"))#split the string to get the key and value
key=l[0]
value=l[1].strip()
argsdict[key]=value
return argsdict
def get_main_verb(pred):
max=1
for x in pred['verbs']:
if len(get_dict_args(x['description']))>max:
best_verb=x
max=len(get_dict_args(x['description']))
return best_verb
def evaluate_INV_sameArgs(preda,predp,verbose=True):
"""
This functions checks if the arguments of the main verb are the same in two predictions. focus on main verb.
Crefull, it only works for sentences with one verb and where the verb is regularly reabsformed with auxiliaries
"""
best_verba = get_main_verb(preda)
best_verbp = get_main_verb(predp)
da=get_dict_args(best_verba['description'])
dp=get_dict_args(best_verbp['description'])
dp.pop("V")
da.pop("V")
if da.keys()!=dp.keys():
print("different arguments")
return False
for k in da.keys():
words1 = set(da[k].lower().split())
words2 = set(dp[k].lower().split())
# Find the common words using the intersection method
common_words = words1.intersection(words2)
if (common_words):
#print(common_words)
continue
else:
return False
return True
def check_NER_tags(pred,golden):
"""
Takes a list of golden tags and a prediction and returns True if the prediction is correct
"""
v=[x['verb'] for x in pred['verbs']]
if (v==['saw']):
if (pred['verbs'][0]['tags']==golden):
return True
else:
return False
def eval_NER(sentences,labels,predictor):
fails=0
for s in sentences:
pred=predictor.predict(s)
success=check_NER_tags(pred,labels)
if not success:
fails+=1
return fails/len(sentences)*100
def find_roleset_MFT(sents,predictor,verboose=False):
"""Takes a dict where every key is the noun/adj that introduces a rolesets and value is the sentence.
Model should detect them among the verbs list according to Propbank"""
fail=0
for x,s in sents.items():
roleset=x
pred=predictor.predict(s)
rolesets_found=[x['verb'] for x in pred['verbs']]
if roleset not in rolesets_found:
if verboose:
print(f"[{x}] not detected from '{s}', only {rolesets_found} were found")
fail+=1
return fail/len(sents)*100
def eval_full_sent_BIOtags(sents,labels,predictor,verb_indx=0,verbose=True):
"""
This function evaluates the predictor by using the BIO tags of the predictions.
"""
fails=0
for s in sents:
pred = predictor.predict(s)
if pred['verbs'][verb_indx]['tags'] != labels:
fails+=1
if verbose:
print("Sentence: ",s)
print("Predicted BIO tags: ",pred['verbs'][verb_indx]['tags'])
print("True BIO tags: ",labels)
return (fails/len(sents)*100)
def eval_full_sent_BIOtags(sents,labels,predictor,verb_indx=0,verbose=True): #verb_indx to 0 as it is often the case that there is only one predicate
"""
This function evaluates the predictor by using the BIO tags of the predictions.
"""
fails=0
for s in sents:
#print(s)
pred = predictor.predict(s)
if not pred['verbs']: #no verb found
fails+=1
if verbose:
print("\n FAILED FOR Sentence: ",s)
print("no verbs found ")
continue
pred_labels=pred['verbs'][verb_indx]['tags']
if pred_labels != labels: #wrong prediction
fails+=1
if verbose:
print("\n FAILED FOR Sentence: ",s)
print("Predicted BIO tags: ",pred_labels)
print("True BIO tags: ",labels)
return (fails/len(sents)*100)
def eval_full_sent_BIOtags_INV(sentences,labels1,labels2,predictor,verbose=False):
"""
This function evaluates the performance of the model on a list of couple of sentence for INV TEST.
:sentences is a dict where key is s1 and value is s2
:labels1 is the list of BIO tags for the first sentence
:labels2 is the list of BIO tags for the second sentence
:predictor is the predictor
returns the rate of failure
"""
fails=0
for a,p in sentences.items():
predA=predictor.predict(a)
predP=predictor.predict(p)
if predA['verbs'][0]['tags']!=labels1 or predP['verbs'][1]['tags']!=labels2: #1 because we the auxiliary in position 0
print("Error")
fails+=1
print(predA['verbs'][0]['description'],"!=",labels1)
print(predP['verbs'][1]['description'],"!=",labels2)
print("\n\n")
return fails/len(sentences)*100
def eval_PP_MFT(di,predictor,verbose=False):
"""This function evaluates a big datasets of known PP attachment.
They all refer to the noun thus the label to be predicted must be ['I-ARG1', 'I-ARG1']
structure of the sentence is always the same. 4 words where last two is PP"""
failure=0
count=0
for c in di:
pred=predictor.predict(c)
if not pred['verbs']:
continue
count+=1
pp_pred=pred['verbs'][0]['tags'][-3:] #takes tag of last 3 words
args=[x.split("-")[0] if "-" in x else x for x in pp_pred]
if args[0]!='B' or args[1]!='I' or args[2]!='I':
failure+=1
if verbose:
print(f"Input sentence: {c}")
print(f"Predicted labels for nound and PP: {pp_pred}. Should have been B-I-I")
continue
return failure/count*100, count
def eval_PP_INV(sentences,predictor,verbose=False):
"""
Evaluate the model on the PP attachment based on partial pos tags. Lables given are infact only the one of the PP
"""
failure=0
for c in sentences:
s1,s2=c.keys()
pred1=predictor.predict(s1)
pred2=predictor.predict(s2)
labels1=c[s1]
labels2=c[s2]
ll1=len(labels1)
ll2=len(labels2)
if pred1['verbs'][0]['tags'][-ll1:]!=labels1:
failure+=1
if verbose:
print(f"Input sentences: {s1}")
print(f"Predicted labels for PP: {pred1['verbs'][0]['tags'][-ll1:]} but should have been {labels1}")
continue
if pred2['verbs'][0]['tags'][-ll2:]!=labels2:
failure+=1
if verbose:
print(f"Input sentences: {s2}")
print(f"Predicted labels for PP: {pred2['verbs'][0]['tags'][-ll2:]} but should have been {labels2}")
return failure/len(sentences)*100
def eval_spanDetection(sents,start_indx,end_indx,predictor,verbose=False):
"""
This function evaluates the span detection task. It takes as input a list of sentences, a list of labels, the start and end index of the span.
If all element between the index are not detected in the same span, is a failure.
returns failure rate.
"""
fails=0
for s in sents:
pred=predictor.predict(s)
preds=pred['verbs']
found=False
for p in preds: #looking for every predicate in the sentence
print(p)
span=p['tags'][start_indx:end_indx]
#print(span)
span=[x.split('-')[1] if x!='O' else 'O' for x in span]
print(span)
if len(set(span))==1: #if the span is all the same label
found=True
else:
continue
if found==False:
fails+=1
if verbose:
print("\nThe span was never detected")
print(pred)
return (fails/len(sents)*100)