forked from yjh0410/YOLO-Nano
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tools.py
280 lines (232 loc) · 11.7 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from data.config import IGNORE_THRESH
# We use ignore thresh to decide which anchor box can be kept.
ignore_thresh = IGNORE_THRESH
class MSEWithLogitsLoss(nn.Module):
def __init__(self, reduction='mean'):
super(MSEWithLogitsLoss, self).__init__()
self.reduction = reduction
def forward(self, logits, targets, mask):
inputs = torch.sigmoid(logits)
# We ignore those whose tarhets == -1.0.
pos_id = (mask==1.0).float()
neg_id = (mask==0.0).float()
pos_loss = pos_id * (inputs - targets)**2
neg_loss = neg_id * (inputs)**2
loss = 5.0*pos_loss + 1.0*neg_loss
if self.reduction == 'mean':
batch_size = logits.size(0)
loss = torch.sum(loss) / batch_size
return loss
else:
return loss
def compute_iou(anchor_boxes, gt_box):
"""
Input:
anchor_boxes : ndarray -> [[c_x_s, c_y_s, anchor_w, anchor_h], ..., [c_x_s, c_y_s, anchor_w, anchor_h]].
gt_box : ndarray -> [c_x_s, c_y_s, anchor_w, anchor_h].
Output:
iou : ndarray -> [iou_1, iou_2, ..., iou_m], and m is equal to the number of anchor boxes.
"""
# compute the iou between anchor box and gt box
# First, change [c_x_s, c_y_s, anchor_w, anchor_h] -> [xmin, ymin, xmax, ymax]
# anchor box :
ab_x1y1_x2y2 = np.zeros([len(anchor_boxes), 4])
ab_x1y1_x2y2[:, 0] = anchor_boxes[:, 0] - anchor_boxes[:, 2] / 2 # xmin
ab_x1y1_x2y2[:, 1] = anchor_boxes[:, 1] - anchor_boxes[:, 3] / 2 # ymin
ab_x1y1_x2y2[:, 2] = anchor_boxes[:, 0] + anchor_boxes[:, 2] / 2 # xmax
ab_x1y1_x2y2[:, 3] = anchor_boxes[:, 1] + anchor_boxes[:, 3] / 2 # ymax
w_ab, h_ab = anchor_boxes[:, 2], anchor_boxes[:, 3]
# gt_box :
# We need to expand gt_box(ndarray) to the shape of anchor_boxes(ndarray), in order to compute IoU easily.
gt_box_expand = np.repeat(gt_box, len(anchor_boxes), axis=0)
gb_x1y1_x2y2 = np.zeros([len(anchor_boxes), 4])
gb_x1y1_x2y2[:, 0] = gt_box_expand[:, 0] - gt_box_expand[:, 2] / 2 # xmin
gb_x1y1_x2y2[:, 1] = gt_box_expand[:, 1] - gt_box_expand[:, 3] / 2 # ymin
gb_x1y1_x2y2[:, 2] = gt_box_expand[:, 0] + gt_box_expand[:, 2] / 2 # xmax
gb_x1y1_x2y2[:, 3] = gt_box_expand[:, 1] + gt_box_expand[:, 3] / 2 # ymin
w_gt, h_gt = gt_box_expand[:, 2], gt_box_expand[:, 3]
# Then we compute IoU between anchor_box and gt_box
S_gt = w_gt * h_gt
S_ab = w_ab * h_ab
I_w = np.minimum(gb_x1y1_x2y2[:, 2], ab_x1y1_x2y2[:, 2]) - np.maximum(gb_x1y1_x2y2[:, 0], ab_x1y1_x2y2[:, 0])
I_h = np.minimum(gb_x1y1_x2y2[:, 3], ab_x1y1_x2y2[:, 3]) - np.maximum(gb_x1y1_x2y2[:, 1], ab_x1y1_x2y2[:, 1])
S_I = I_h * I_w
U = S_gt + S_ab - S_I + 1e-20
IoU = S_I / U
return IoU
def set_anchors(anchor_size):
"""
Input:
anchor_size : list -> [[h_1, w_1], [h_2, w_2], ..., [h_n, w_n]].
Output:
anchor_boxes : ndarray -> [[0, 0, anchor_w, anchor_h],
[0, 0, anchor_w, anchor_h],
...
[0, 0, anchor_w, anchor_h]].
"""
anchor_number = len(anchor_size)
anchor_boxes = np.zeros([anchor_number, 4])
for index, size in enumerate(anchor_size):
anchor_w, anchor_h = size
anchor_boxes[index] = np.array([0, 0, anchor_w, anchor_h])
return anchor_boxes
def multi_gt_creator(input_size, strides, label_lists, anchor_size):
"""creator multi scales gt"""
# prepare the all empty gt datas
batch_size = len(label_lists)
h = w = input_size
num_scale = len(strides)
gt_tensor = []
all_anchor_size = anchor_size
anchor_number = len(all_anchor_size) // num_scale
for s in strides:
gt_tensor.append(np.zeros([batch_size, h//s, w//s, anchor_number, 1+1+4+1+4]))
# generate gt datas
for batch_index in range(batch_size):
for gt_label in label_lists[batch_index]:
# get a bbox coords
gt_class = int(gt_label[-1])
xmin, ymin, xmax, ymax = gt_label[:-1]
# compute the center, width and height
c_x = (xmax + xmin) / 2 * w
c_y = (ymax + ymin) / 2 * h
box_w = (xmax - xmin) * w
box_h = (ymax - ymin) * h
if box_w < 1. or box_h < 1.:
# print('A dirty data !!!')
continue
# compute the IoU
anchor_boxes = set_anchors(all_anchor_size)
gt_box = np.array([[0, 0, box_w, box_h]])
iou = compute_iou(anchor_boxes, gt_box)
# We only consider those anchor boxes whose IoU is more than ignore thresh,
iou_mask = (iou > ignore_thresh)
if iou_mask.sum() == 0:
# We assign the anchor box with highest IoU score.
index = np.argmax(iou)
# s_indx, ab_ind = index // num_scale, index % num_scale
s_indx = index // anchor_number
ab_ind = index - s_indx * anchor_number
# get the corresponding stride
s = strides[s_indx]
# get the corresponding anchor box
p_w, p_h = anchor_boxes[index, 2], anchor_boxes[index, 3]
# compute the gride cell location
c_x_s = c_x / s
c_y_s = c_y / s
grid_x = int(c_x_s)
grid_y = int(c_y_s)
# compute gt labels
tx = c_x_s - grid_x
ty = c_y_s - grid_y
tw = np.log(box_w / p_w)
th = np.log(box_h / p_h)
weight = 2.0 - (box_w / w) * (box_h / h)
if grid_y < gt_tensor[s_indx].shape[1] and grid_x < gt_tensor[s_indx].shape[2]:
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 0] = 1.0
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 1] = gt_class
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 2:6] = np.array([tx, ty, tw, th])
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 6] = weight
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 7:] = np.array([xmin, ymin, xmax, ymax])
else:
# There are more than one anchor boxes whose IoU are higher than ignore thresh.
# But we only assign only one anchor box whose IoU is the best(objectness target is 1) and ignore other
# anchor boxes whose(we set their objectness as -1 which means we will ignore them during computing obj loss )
# iou_ = iou * iou_mask
# We get the index of the best IoU
best_index = np.argmax(iou)
for index, iou_m in enumerate(iou_mask):
if iou_m:
if index == best_index:
# s_indx, ab_ind = index // num_scale, index % num_scale
s_indx = index // anchor_number
ab_ind = index - s_indx * anchor_number
# get the corresponding stride
s = strides[s_indx]
# get the corresponding anchor box
p_w, p_h = anchor_boxes[index, 2], anchor_boxes[index, 3]
# compute the gride cell location
c_x_s = c_x / s
c_y_s = c_y / s
grid_x = int(c_x_s)
grid_y = int(c_y_s)
# compute gt labels
tx = c_x_s - grid_x
ty = c_y_s - grid_y
tw = np.log(box_w / p_w)
th = np.log(box_h / p_h)
weight = 2.0 - (box_w / w) * (box_h / h)
if grid_y < gt_tensor[s_indx].shape[1] and grid_x < gt_tensor[s_indx].shape[2]:
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 0] = 1.0
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 1] = gt_class
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 2:6] = np.array([tx, ty, tw, th])
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 6] = weight
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 7:] = np.array([xmin, ymin, xmax, ymax])
else:
# we ignore other anchor boxes even if their iou scores are higher than ignore thresh
# s_indx, ab_ind = index // num_scale, index % num_scale
s_indx = index // anchor_number
ab_ind = index - s_indx * anchor_number
s = strides[s_indx]
c_x_s = c_x / s
c_y_s = c_y / s
grid_x = int(c_x_s)
grid_y = int(c_y_s)
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 0] = -1.0
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 6] = -1.0
gt_tensor = [gt.reshape(batch_size, -1, 1+1+4+1+4) for gt in gt_tensor]
gt_tensor = np.concatenate(gt_tensor, 1)
return torch.from_numpy(gt_tensor).float()
def iou_score(bboxes_a, bboxes_b, batch_size):
"""
bbox_1 : [B*N, 4] = [x1, y1, x2, y2]
bbox_2 : [B*N, 4] = [x1, y1, x2, y2]
"""
tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2])
br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:])
area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
en = (tl < br).type(tl.type()).prod(dim=1)
area_i = torch.prod(br - tl, 1) * en # * ((tl < br).all())
iou = area_i / (area_a + area_b - area_i)
return iou.view(batch_size, -1, 1)
def loss(pred_conf, pred_cls, pred_txtytwth, pred_iou, label):
# creat loss function
conf_loss_function = MSEWithLogitsLoss(reduction='mean')
cls_loss_function = nn.CrossEntropyLoss(reduction='none')
txty_loss_function = nn.BCEWithLogitsLoss(reduction='none')
twth_loss_function = nn.MSELoss(reduction='none')
iou_loss_function = nn.SmoothL1Loss(reduction='none')
# pred
pred_conf = pred_conf[:, :, 0]
pred_cls = pred_cls.permute(0, 2, 1)
pred_txty = pred_txtytwth[:, :, :2]
pred_twth = pred_txtytwth[:, :, 2:]
pred_iou = pred_iou[:, :, 0]
# gt
gt_conf = label[:, :, 0].float()
gt_obj = label[:, :, 1].float()
gt_cls = label[:, :, 2].long()
gt_txty = label[:, :, 3:5].float()
gt_twth = label[:, :, 5:7].float()
gt_box_scale_weight = label[:, :, 7].float()
gt_mask = (gt_obj > 0.).float()
gt_iou = gt_mask.clone()
batch_size = pred_conf.size(0)
# objectness loss
conf_loss = conf_loss_function(pred_conf, gt_conf, mask=gt_obj)
# class loss
cls_loss = torch.sum(cls_loss_function(pred_cls, gt_cls) * gt_mask) / batch_size
# box loss
txty_loss = torch.sum(torch.sum(txty_loss_function(pred_txty, gt_txty), dim=-1) * gt_box_scale_weight * gt_mask) / batch_size
twth_loss = torch.sum(torch.sum(twth_loss_function(pred_twth, gt_twth), dim=-1) * gt_box_scale_weight * gt_mask) / batch_size
bbox_loss = txty_loss + twth_loss
# iou loss
iou_loss = torch.sum(iou_loss_function(pred_iou, gt_iou)) / batch_size
return conf_loss, cls_loss, bbox_loss, iou_loss
if __name__ == "__main__":
pass