-
Notifications
You must be signed in to change notification settings - Fork 56
/
evaluation.py
257 lines (215 loc) · 9.83 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# from https://github.com/chainer/chainercv/blob/master/chainercv/evaluations/eval_detection_voc.py
from collections import defaultdict
import itertools
import numpy as np
from collections import defaultdict
import itertools
import numpy as np
import six
def jaccard(a, b):
# pairwise jaccard(IoU) botween boxes a and boxes b
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
inter = np.clip(rb - lt, 0, None)
area_i = np.prod(inter, axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_u = area_a[:, np.newaxis] + area_b - area_i
return area_i / np.clip(area_u, 1e-7, None) # len(a) x len(b)
def eval_voc_detection(
pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
gt_difficults=None,
iou_thresh=0.5, use_07_metric=False):
prec, rec = calc_detection_voc_prec_rec(
pred_bboxes, pred_labels, pred_scores,
gt_bboxes, gt_labels, gt_difficults,
iou_thresh=iou_thresh)
ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
return {'ap': ap, 'map': np.nanmean(ap)}
def calc_detection_voc_prec_rec(
pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
gt_difficults=None,
iou_thresh=0.5):
"""Calculate precision and recall based on evaluation code of PASCAL VOC.
This function calculates precision and recall of
predicted bounding boxes obtained from a dataset which has :math:`N`
images.
The code is based on the evaluation code used in PASCAL VOC Challenge.
Args:
pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
sets of bounding boxes.
Its index corresponds to an index for the base dataset.
Each element of :obj:`pred_bboxes` is a set of coordinates
of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
where :math:`R` corresponds
to the number of bounding boxes, which may vary among boxes.
The second axis corresponds to :obj:`y_min, x_min, y_max, x_max`
of a bounding box.
pred_labels (iterable of numpy.ndarray): An iterable of labels.
Similar to :obj:`pred_bboxes`, its index corresponds to an
index for the base dataset. Its length is :math:`N`.
pred_scores (iterable of numpy.ndarray): An iterable of confidence
scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
its index corresponds to an index for the base dataset.
Its length is :math:`N`.
gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
bounding boxes
whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
bounding box whose shape is :math:`(R, 4)`. Note that the number of
bounding boxes in each image does not need to be same as the number
of corresponding predicted boxes.
gt_labels (iterable of numpy.ndarray): An iterable of ground truth
labels which are organized similarly to :obj:`gt_bboxes`.
gt_difficults (iterable of numpy.ndarray): An iterable of boolean
arrays which is organized similarly to :obj:`gt_bboxes`.
This tells whether the
corresponding ground truth bounding box is difficult or not.
By default, this is :obj:`None`. In that case, this function
considers all bounding boxes to be not difficult.
iou_thresh (float): A prediction is correct if its Intersection over
Union with the ground truth is above this value..
Returns:
tuple of two lists:
This function returns two lists: :obj:`prec` and :obj:`rec`.
* :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
for class :math:`l`. If class :math:`l` does not exist in \
either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
set to :obj:`None`.
* :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
for class :math:`l`. If class :math:`l` that is not marked as \
difficult does not exist in \
:obj:`gt_labels`, :obj:`rec[l]` is \
set to :obj:`None`.
"""
pred_bboxes = iter(pred_bboxes)
pred_labels = iter(pred_labels)
pred_scores = iter(pred_scores)
gt_bboxes = iter(gt_bboxes)
gt_labels = iter(gt_labels)
if gt_difficults is None:
gt_difficults = itertools.repeat(None)
else:
gt_difficults = iter(gt_difficults)
n_pos = defaultdict(int)
score = defaultdict(list)
match = defaultdict(list)
for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
six.moves.zip(
pred_bboxes, pred_labels, pred_scores,
gt_bboxes, gt_labels, gt_difficults):
if gt_difficult is None:
gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
pred_mask_l = pred_label == l
pred_bbox_l = pred_bbox[pred_mask_l]
pred_score_l = pred_score[pred_mask_l]
# sort by score
order = pred_score_l.argsort()[::-1]
pred_bbox_l = pred_bbox_l[order]
pred_score_l = pred_score_l[order]
gt_mask_l = gt_label == l
gt_bbox_l = gt_bbox[gt_mask_l]
gt_difficult_l = gt_difficult[gt_mask_l]
n_pos[l] += np.logical_not(gt_difficult_l).sum()
score[l].extend(pred_score_l)
if len(pred_bbox_l) == 0:
continue
if len(gt_bbox_l) == 0:
match[l].extend((0,) * pred_bbox_l.shape[0])
continue
# VOC evaluation follows integer typed bounding boxes.
pred_bbox_l = pred_bbox_l.copy()
#pred_bbox_l[:, 2:] += 1
gt_bbox_l = gt_bbox_l.copy()
#gt_bbox_l[:, 2:] += 1
iou = jaccard(pred_bbox_l, gt_bbox_l)
gt_index = iou.argmax(axis=1)
# set -1 if there is no matching ground truth
gt_index[iou.max(axis=1) < iou_thresh] = -1
del iou
selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
for gt_idx in gt_index:
if gt_idx >= 0:
if gt_difficult_l[gt_idx]:
match[l].append(-1)
else:
if not selec[gt_idx]:
match[l].append(1)
else:
match[l].append(0)
selec[gt_idx] = True
else:
match[l].append(0)
for iter_ in (
pred_bboxes, pred_labels, pred_scores,
gt_bboxes, gt_labels, gt_difficults):
if next(iter_, None) is not None:
raise ValueError('Length of input iterables need to be same.')
n_fg_class = max(n_pos.keys()) + 1
prec = [None] * n_fg_class
rec = [None] * n_fg_class
for l in n_pos.keys():
score_l = np.array(score[l])
match_l = np.array(match[l], dtype=np.int8)
order = score_l.argsort()[::-1]
match_l = match_l[order]
tp = np.cumsum(match_l == 1)
fp = np.cumsum(match_l == 0)
# If an element of fp + tp is 0,
# the corresponding element of prec[l] is nan.
prec[l] = tp / (fp + tp)
# If n_pos[l] is 0, rec[l] is None.
if n_pos[l] > 0:
rec[l] = tp / n_pos[l]
return prec, rec
def calc_detection_voc_ap(prec, rec, use_07_metric=False):
"""Calculate average precisions based on evaluation code of PASCAL VOC.
This function calculates average precisions
from given precisions and recalls.
The code is based on the evaluation code used in PASCAL VOC Challenge.
Args:
prec (list of numpy.array): A list of arrays.
:obj:`prec[l]` indicates precision for class :math:`l`.
If :obj:`prec[l]` is :obj:`None`, this function returns
:obj:`numpy.nan` for class :math:`l`.
rec (list of numpy.array): A list of arrays.
:obj:`rec[l]` indicates recall for class :math:`l`.
If :obj:`rec[l]` is :obj:`None`, this function returns
:obj:`numpy.nan` for class :math:`l`.
use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
for calculating average precision. The default value is
:obj:`False`.
Returns:
~numpy.ndarray:
This function returns an array of average precisions.
The :math:`l`-th value corresponds to the average precision
for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
:obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
"""
n_fg_class = len(prec)
ap = np.empty(n_fg_class)
for l in six.moves.range(n_fg_class):
if prec[l] is None or rec[l] is None:
ap[l] = np.nan
continue
if use_07_metric:
# 11 point metric
ap[l] = 0
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec[l] >= t) == 0:
p = 0
else:
p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
ap[l] += p / 11
else:
# correct AP calculation
# first append sentinel values at the end
mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
mrec = np.concatenate(([0], rec[l], [1]))
mpre = np.maximum.accumulate(mpre[::-1])[::-1]
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap