-
Notifications
You must be signed in to change notification settings - Fork 4
/
extract_confidence_result.py
232 lines (216 loc) · 13.7 KB
/
extract_confidence_result.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import csv
import numpy as np
import pandas as pd
with open('results/merge_new_price_maxsep.csv', 'r') as f:
reader = csv.reader(f)
pred = list(reader)
with open('results/merge_new_price_maxsep_confidence.csv', 'r') as f:
reader = csv.reader(f)
conf = list(reader)
pred_conf_dict = dict()
for line in pred:
preds_confs = {i.split('/')[0]: [i.split('/')[1]] for i in line[1:]}
pred_conf_dict[line[0]] = preds_confs
for line in conf:
for ec_conf in line[1:]:
pred_conf_dict[line[0]][ec_conf.split('/')[0]].append(ec_conf.split('/')[1].split('_')[0])
results = dict()
data_conf = dict()
for i in np.linspace(0.1, 1, 10):
results['confidence_0_{:.1f}'.format(i)] = dict()
data_conf['confidence_0_{:.1f}'.format(i)] = []
data = pd.read_csv('data/merge_new_price.csv', sep='\t')
results_lvl_3_4 = {
'confidence_0_0.5': dict(),
'confidence_0.5_1.0': dict(),
}
data_conf_lvl_3_4 = {
'confidence_0_0.5': [],
'confidence_0.5_1.0': [],
}
for ent in pred_conf_dict:
for k in results:
results[k][ent] = list()
results_lvl_3_4['confidence_0.5_1.0'][ent] = list()
results_lvl_3_4['confidence_0_0.5'][ent] = list()
for ec in pred_conf_dict[ent]:
if float(pred_conf_dict[ent][ec][1]) > 0.9:
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
# else:
# results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
# results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
elif float(pred_conf_dict[ent][ec][1]) > 0.8:
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.7:
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.6:
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.5:
results['confidence_0_0.6'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.6'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.4:
results['confidence_0_0.5'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.6'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.5'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.6'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.3:
results['confidence_0_0.4'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.5'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.6'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.4'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.5'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.6'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.2:
results['confidence_0_0.3'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.4'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.5'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.6'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.3'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.4'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.5'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.6'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
elif float(pred_conf_dict[ent][ec][1]) > 0.1:
results['confidence_0_0.2'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.3'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.4'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.5'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.6'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.2'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.3'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.4'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.5'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.6'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
else:
results['confidence_0_0.1'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.2'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.3'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.4'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.5'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.6'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.7'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.8'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_0.9'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
results['confidence_0_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf['confidence_0_0.1'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.2'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.3'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.4'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.5'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.6'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.7'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.8'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_0.9'].append(data.loc[data['Entry'] == ent])
data_conf['confidence_0_1.0'].append(data.loc[data['Entry'] == ent])
if float(pred_conf_dict[ent][ec][1]) > 0.5:
results_lvl_3_4['confidence_0.5_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
data_conf_lvl_3_4['confidence_0.5_1.0'].append(data.loc[data['Entry'] == ent])
else:
ec_lvs = ec.split('.')
ec_lvs[-1] = '-'
new_ec = '.'.join(ec_lvs)
pred_ec = f'{new_ec}/{pred_conf_dict[ent][ec][0]}'
# pred_ec = pred_ec[:-1] + '-'
results_lvl_3_4['confidence_0_0.5'][ent].append(pred_ec)
# results_lvl_3_4['confidence_0.5_1.0'][ent].append(f'{ec}/{pred_conf_dict[ent][ec][0]}')
# true_ec = data.loc[data['Entry'] == ent]
data_conf_lvl_3_4['confidence_0_0.5'].append(data.loc[data['Entry'] == ent])
# data_conf_lvl_3_4['confidence_0.5_1.0'].append(data.loc[data['Entry'] == ent])
write_results = {k: [] for k in results}
# print(write_results)
for k in results:
for ent in results[k]:
if len(results[k][ent]) > 0:
# print(k)
# print(ent)
# print(results[k][ent])
write_results[k].append([ent] + results[k][ent])
# print(write_results[k])
# print(results['confidence_0_1.0'])
for key in write_results:
with open('results/merge_new_price_maxsep_' + key + '.csv', 'w', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerows(write_results[key])
for key in data_conf:
data_conf[key] = pd.concat(data_conf[key])
data_conf[key] = data_conf[key].drop_duplicates()
data_conf[key].to_csv('data/merge_new_price_maxsep_' + key + '.csv', sep='\t', index=False)
write_results_lvl_3_4 = {k: [] for k in results_lvl_3_4}
for k in results_lvl_3_4:
for ent in results_lvl_3_4[k]:
if len(results_lvl_3_4[k][ent]) > 0:
write_results_lvl_3_4[k].append([ent] + results_lvl_3_4[k][ent])
for key in write_results_lvl_3_4:
with open('results/merge_new_price_maxsep_lv34_' + key + '.csv', 'w', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerows(write_results_lvl_3_4[key])
def f(x):
ecs = x.split(';')
new_ecs = []
for ec in ecs:
ec_lvs = ec.split('.')
ec_lvs[-1] = '-'
new_ec = '.'.join(ec_lvs)
new_ecs.append(new_ec)
return ';'.join(new_ecs)
for key in data_conf_lvl_3_4:
data_conf_lvl_3_4[key] = pd.concat(data_conf_lvl_3_4[key])
data_conf_lvl_3_4[key] = data_conf_lvl_3_4[key].drop_duplicates()
if '0_0.5' in key:
data_conf_lvl_3_4[key]['EC number'] = data_conf_lvl_3_4[key]['EC number'].apply(f)
data_conf_lvl_3_4[key].to_csv('data/merge_new_price_maxsep_lv34_' + key + '.csv', sep='\t', index=False)