forked from ericBayless/bib-detector
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDetector.py
187 lines (148 loc) · 6.23 KB
/
Detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import cv2 as cv
import numpy as np
# Bib detection model config
bd_configPath = 'Data/YOLO/bib_detector/RBNR2_custom-yolov4-tiny-detector.cfg'
bd_weightsPath = 'Data/YOLO/bib_detector/RBNR2_custom-yolov4-tiny-detector_best.weights'
bd_classes = ['bib']
# Number reader config
nr_configPath = 'Data/YOLO/num_reader/SVHN3_custom-yolov4-tiny-detector.cfg'
nr_weightsPath = 'Data/YOLO/num_reader/SVHN3_custom-yolov4-tiny-detector_best.weights'
nr_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
class Detector:
"""
Create YOLO object detection model in OpenCV with a given config and weights.
Use this model to make predictions.
Attributes
classes (list): list of class names
net (obj): openCV network object
ln (obj): openCV layer names object
"""
def __init__(self, cfg, wts, classes):
"""Initialize detector object
Args
cfg (str): path to model config file
wts (str): path to model weights file
classes (list): list of class names
"""
self.classes = classes
self.net = cv.dnn.readNetFromDarknet(cfg, wts)
self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
# determine the output layer
self.ln = self.net.getLayerNames()
self.ln = [self.ln[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]
def detect(self, img, conf):
"""
Make predictions and return classes and bounding boxes
Args
img (numpy array): image array from openCV .imread
conf (float): prediction confidence threshold
Returns
List containing bounding box values and class names for detections
in the form [<class name>, [x, y, width, height]]
"""
#format image for detection
blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
# get detections
self.net.setInput(blob)
outputs = self.net.forward(self.ln)
# initialize lists
boxes = []
confidences = []
classIDs = []
# initialize image dimensions
h_img, w_img = img.shape[:2]
for output in outputs:
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# drop low confidence detections and
if confidence > conf:
box = detection[:4] * np.array([w_img, h_img, w_img, h_img])
(centerX, centerY, width, height) = box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
box = [x, y, int(width), int(height)]
boxes.append(box)
confidences.append(float(confidence))
classIDs.append(classID)
# apply non maximal suppression for
# initialize lists
self.boxes = []
self.confidences = []
self.detected_classes = []
cls_and_box = []
# get indices of final bounding boxes
indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
if len(indices) > 0:
for i in indices.flatten():
self.boxes.append(boxes[i])
self.confidences.append(confidences[i])
self.detected_classes.append(self.classes[classIDs[i]])
cls_and_box.append([self.classes[classIDs[i]], boxes[i]])
return cls_and_box
def get_rbns(img, single=False):
"""
Given an image return bib numbers and bib bounding boxes for detected bibs
Args
img (numpy array): image array given by openCV .imread
single (bool): whether one or many bib detections will be
returned. If true, return detection with largest bounding
box area.
Returns
List of detected bib numbers and corresponding bounding boxes in
the format [<bib number>, [x, y, width, height]]
"""
# Instantiate detectors
bd = Detector(bd_configPath, bd_weightsPath, bd_classes)
nr = Detector(nr_configPath, nr_weightsPath, nr_classes)
# Make bib location predictions
bib_detections = bd.detect(img, 0.25)
if len(bib_detections) > 0:
for obj in bib_detections:
# crop out detected bib
(x, y, w, h) = obj[1]
obj.append(w * h)
crop_img = img[y:y+h, x:x+w]
# detect numbers on bib
num_detections = nr.detect(crop_img, 0.5)
bib_digit_loc = []
if len(num_detections) > 0:
# get digits and locations
for digit in num_detections:
(d_x, d_y, d_w, d_h) = digit[1]
bib_digit_loc.append((d_x, str(digit[0])))
# sort detected numbers L->R and put together
bib_digit_loc.sort()
rbn = int(''.join([i[1] for i in bib_digit_loc]))
obj.append(rbn)
else:
obj.append(0) # bib detection but no digit detection
if single:
if len(bib_detections) > 1:
bib_detections.sort(key=lambda x: x[2], reverse=True)
return [[bib_detections[0][3], bib_detections[0][1]]]
else:
final_bibs = []
for bib in bib_detections:
final_bibs.append([bib[3], bib[1]])
return final_bibs
else: return None
def annotate(img, annot, color):
"""
Add bib numbers and bib bounding boxes to an image
Args
img (numpy array): image array of original from openCV .imread
annot (list): list of bib numbers and bounding boxes in the
form [[<bib number>, [x, y, width, height]]]
color (array): RGB color array for annotation color
Returns
Annotated image as numpy array
"""
# draw bouding box on original image
(x, y, w, h) = annot[1]
annot_img = cv.rectangle(img,(x,y),(x+w,y+h),color,5)
# add bib number to original image
rbn = annot[0]
cv.putText(annot_img, str(rbn), (x, y - 25), cv.FONT_HERSHEY_SIMPLEX, 2, color, 4)
return annot_img