-
Notifications
You must be signed in to change notification settings - Fork 34
/
evaluate_morphotags.py
86 lines (75 loc) · 2.82 KB
/
evaluate_morphotags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
'''
Follows <gold,observed> pairs, each <attribute, value> and produces macro or micro f1 scores,
either by attribute alone (pooled over values) or by attribute-value combination.
'''
from numpy import average
__author__ = "Yuval Pinter, November 2016"
def f1(corr, gold, obs):
if gold <= 0 or obs <= 0 or corr <= 0:
return 0
rec = corr / gold
pre = corr / obs
return (2 * rec * pre) / (rec + pre)
class Evaluator(object):
'''
Aggregates and evaluates attribute scores in several available modes:
att - pool scores by attribute over values
att_val - separate scores for each <attribute, value> pair
exact - only compute accuracy for full tag (all attributes in instance)
'''
def __init__(self, m='att'):
self.instance_count = 0
self.exact_match = 0
self.correct = {}
self.gold = {}
self.observed = {}
self.mode = m
def add_instance(self, g, o):
'''
:param g: - gold annotation for instance
:param o: - observed (inferred) annotation for instance
'''
self.instance_count = self.instance_count + 1
if self.mode == 'exact':
if g == o: # order-insensitive
self.exact_match = self.exact_match + 1
return
for (k, v) in list(g.items()):
key = self._key(k, v)
if o.get(k, 'NOT A VALUE') == v:
self.correct[key] = self.correct.get(key, 0) + 1 # for macro-micro
self.gold[key] = self.gold.get(key, 0) + 1 # mac-mic
for (k, v) in list(o.items()):
key = self._key(k, v)
self.observed[key] = self.observed.get(key, 0) + 1 # mac-mic
def _key(self, k, v):
if self.mode == 'att':
return k
if self.mode == 'att_val':
return (k,v)
def mic_f1(self, att = None):
'''
Micro F1
:param att: get f1 for specific attribute (exact match)
'''
if att != None:
return f1(self.correct.get(att, 0), self.gold.get(att, 0), self.observed.get(att, 0))
return f1(sum(self.correct.values()), sum(self.gold.values()), sum(self.observed.values()))
def mac_f1(self, att = None):
'''
Macro F1
:param att: only relevant in att_val mode, otherwise fails (use mic_f1)
'''
all_keys = set().union(list(self.gold.keys()), list(self.observed.keys()))
if att == None:
keys = all_keys
else:
keys = [k for k in all_keys if k[0] == att]
return average([f1(self.correct.get(k, 0), self.gold.get(k, 0), self.observed.get(k, 0)) for k in keys])
def acc(self):
'''
Accuracy for 'exact_match' mode
'''
if self.instance_count <= 0:
return 0.0
return self.exact_match / self.instance_count