-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_yolo.py
94 lines (71 loc) · 3.34 KB
/
train_yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from ultralytics import YOLO
from PIL import Image
import numpy as np
# Load a model
#
model = YOLO('/nas_data/WTY/project/nlp_task1/runs/detect/train2/weights/best.pt') # load a pretrained model (recommended for training)
# Train the model with 2 GPUs
# model = YOLO('/nas_data/WTY/project/nlp_task1/runs/detect/train2/weights/best.pt')
# results = model.train(data='custom.yaml', epochs=100, imgsz=640, device=[1,2])
results = model.predict(['1000_0003.jpg'], conf=0.5)
# sort crop images
def regularization_sorting(Lx, Ly, W, H, alpha, beta):
sorted_Lx, sorted_Ly, sorted_W, sorted_H = [], [], [], []
while len(Lx):
# Step 3: Calculate the average value M within the range of alpha for the minimum values of Ly
min_Ly = min(Ly)
M = sum(y for y in Ly if y <= min_Ly + alpha) / len([y for y in Ly if y <= min_Ly + alpha])
# Step 4: Treat the index i of characters that are within a distance of beta from the mean M
indices = [i for i, y in enumerate(Ly) if abs(y - M) <= beta]
# Step 5: Sort i according to horizontal coordinate from small to large
indices.sort(key=lambda i: Lx[i])
# Step 6: Take sorted coordinates according to i into sorted_Lx, sorted_Ly, sorted_W, sorted_H
for i in indices:
sorted_Lx.append(Lx[i])
sorted_Ly.append(Ly[i])
sorted_W.append(W[i])
sorted_H.append(H[i])
# Step 7: Remove the coordinates already taken from Lx, Ly, W, H
Lx = [x for i, x in enumerate(Lx) if i not in indices]
Ly = [y for i, y in enumerate(Ly) if i not in indices]
W = [w for i, w in enumerate(W) if i not in indices]
H = [h for i, h in enumerate(H) if i not in indices]
return sorted_Lx, sorted_Ly, sorted_W, sorted_H
# convert x y w h n to lx ly w h
def convert_boxes(boxes, size):
for box in boxes:
box[0] = box[0] * size[0]
box[1] = box[1] * size[1]
box[2] = box[2] * size[0]
box[3] = box[3] * size[1]
box[0] = box[0] - box[2] / 2
box[1] = box[1] - box[3] / 2
return boxes
# eg. crop and sort images
for r in results:
img = Image.open('1000_0003.jpg')
size = img.size
boxes = r.boxes.xywhn.cpu().numpy()
boxes = convert_boxes(boxes, size)
sb1,sb2,sb3, sb4 = regularization_sorting(boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3], alpha=45, beta=30)
sb1,sb2,sb3, sb4 = np.expand_dims(sb1, axis=1), np.expand_dims(sb2, axis=1), np.expand_dims(sb3, axis=1), np.expand_dims(sb4, axis=1)
boxes = np.concatenate([sb1, sb2, sb3, sb4], axis=1)
for i, box in enumerate(boxes):
x, y, w, h = box
print(x, y, w, h)
crop = img.crop((x, y, x+w, y+h))
crop.save(f'results/{i}.jpg')
# from PIL import ImageDraw
# draw = ImageDraw.Draw(img)
# for box in boxes:
# x, y, w, h = box
# draw.rectangle([x, y, x+w, y+h], outline='red')
# img.save('result.jpg')
# print(boxes)
# for result in results:
# boxes = result.boxes # Boxes object for bounding box outputs
# masks = result.masks # Masks object for segmentation masks outputs
# keypoints = result.keypoints # Keypoints object for pose outputs
# probs = result.probs # Probs object for classification outputs
# # result.show() # display to screen
# result.save(filename='result.jpg') # save to disk