-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathobject_detector.py
220 lines (188 loc) · 7.58 KB
/
object_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import onnxruntime as ort
from flask import request, Flask, jsonify
from waitress import serve
from PIL import Image
import numpy as np
import cv2
app = Flask(__name__)
def main():
serve(app, host='0.0.0.0', port=8080)
@app.route("/")
def root():
"""
Site main page handler function.
:return: Content of index.html file
"""
with open("index.html") as file:
return file.read()
@app.route("/detect", methods=["POST"])
def detect():
"""
Handler of /detect POST endpoint
Receives uploaded file with a name "image_file", passes it
through YOLOv8 object detection network and returns and array
of bounding boxes.
:return: a JSON array of objects bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..]
"""
buf = request.files["image_file"]
boxes = detect_objects_on_image(buf.stream)
return jsonify(boxes)
def detect_objects_on_image(buf):
"""
Function receives an image,
passes it through YOLOv8 neural network
and returns an array of detected objects
and their bounding boxes
:param buf: Input image file stream
:return: Array of bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..]
"""
input, img_width, img_height = prepare_input(buf)
output = run_model(input)
return process_output(output, img_width, img_height)
def prepare_input(buf):
"""
Function used to convert input image to tensor,
required as an input to YOLOv8 object detection
network.
:param buf: Uploaded file input stream
:return: Numpy array in a shape (3,width,height) where 3 is number of color channels
"""
img = Image.open(buf)
img_width, img_height = img.size
img = img.resize((640, 640))
img = img.convert("RGB")
input = np.array(img) / 255.0
input = input.transpose(2, 0, 1)
input = input.reshape(1, 3, 640, 640)
return input.astype(np.float32), img_width, img_height
def run_model(input):
"""
Function used to pass provided input tensor to
YOLOv8 neural network and return result
:param input: Numpy array in a shape (3,width,height)
:return: Raw output of YOLOv8 network as an array of shape (1,84,8400)
"""
model = ort.InferenceSession("yolov8m-seg.onnx")
outputs = model.run(None, {"images":input})
return outputs
def process_output(outputs, img_width, img_height):
"""
Function used to convert RAW output from YOLOv8 to an array
of detected objects. Each object contain the bounding box of
this object, the type of object and the probability
:param outputs: Raw outputs of YOLOv8 network
:param img_width: The width of original image
:param img_height: The height of original image
:return: Array of detected objects in a format [[x1,y1,x2,y2,object_type,probability],..]
"""
output0 = outputs[0].astype("float")
output1 = outputs[1].astype("float")
output0 = output0[0].transpose()
output1 = output1[0]
boxes = output0[:, 0:84]
masks = output0[:, 84:]
output1 = output1.reshape(32, 160 * 160)
masks = masks @ output1
boxes = np.hstack((boxes, masks))
objects = []
for row in boxes:
prob = row[4:84].max()
if prob < 0.5:
continue
class_id = row[4:84].argmax()
label = yolo_classes[class_id]
xc, yc, w, h = row[:4]
x1 = (xc - w/2) / 640 * img_width
y1 = (yc - h/2) / 640 * img_height
x2 = (xc + w/2) / 640 * img_width
y2 = (yc + h/2) / 640 * img_height
mask = get_mask(row[84:25684], (x1, y1, x2, y2), img_width, img_height)
polygon = get_polygon(mask)
objects.append([x1, y1, x2, y2, label, prob, polygon])
objects.sort(key=lambda x: x[5], reverse=True)
result = []
while len(objects) > 0:
result.append(objects[0])
objects = [object for object in objects if iou(object, objects[0]) < 0.5]
return result
def get_mask(row,box, img_width, img_height):
"""
Function extracts segmentation mask for object in a row
:param row: Row with object
:param box: Bounding box of the object [x1,y1,x2,y2]
:param img_width: Width of original image
:param img_height: Height of original image
:return: Segmentation mask as NumPy array
"""
mask = row.reshape(160,160)
mask = sigmoid(mask)
mask = (mask > 0.5).astype('uint8')*255
x1,y1,x2,y2 = box
mask_x1 = round(x1/img_width*160)
mask_y1 = round(y1/img_height*160)
mask_x2 = round(x2/img_width*160)
mask_y2 = round(y2/img_height*160)
mask = mask[mask_y1:mask_y2,mask_x1:mask_x2]
img_mask = Image.fromarray(mask,"L")
img_mask = img_mask.resize((round(x2-x1),round(y2-y1)))
mask = np.array(img_mask)
return mask
def get_polygon(mask):
"""
Function calculates bounding polygon based on segmentation mask
:param mask: Segmentation mask as Numpy Array
:return:
"""
contours = cv2.findContours(mask,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
polygon = [[int(contour[0][0]), int(contour[0][1])] for contour in contours[0][0]]
return polygon
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def iou(box1,box2):
"""
Function calculates "Intersection-over-union" coefficient for specified two boxes
https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/.
:param box1: First box in format: [x1,y1,x2,y2,object_class,probability]
:param box2: Second box in format: [x1,y1,x2,y2,object_class,probability]
:return: Intersection over union ratio as a float number
"""
return intersection(box1,box2)/union(box1,box2)
def union(box1,box2):
"""
Function calculates union area of two boxes
:param box1: First box in format [x1,y1,x2,y2,object_class,probability]
:param box2: Second box in format [x1,y1,x2,y2,object_class,probability]
:return: Area of the boxes union as a float number
"""
box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4]
box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4]
box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1)
box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1)
return box1_area + box2_area - intersection(box1,box2)
def intersection(box1,box2):
"""
Function calculates intersection area of two boxes
:param box1: First box in format [x1,y1,x2,y2,object_class,probability]
:param box2: Second box in format [x1,y1,x2,y2,object_class,probability]
:return: Area of intersection of the boxes as a float number
"""
box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4]
box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4]
x1 = max(box1_x1,box2_x1)
y1 = max(box1_y1,box2_y1)
x2 = min(box1_x2,box2_x2)
y2 = min(box1_y2,box2_y2)
return (x2-x1)*(y2-y1)
# Array of YOLOv8 class labels
yolo_classes = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
"skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon",
"bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
"cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
"clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]
main()