-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMusicScoreProc.py
379 lines (341 loc) · 14.5 KB
/
MusicScoreProc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
import cv2 as cv
import numpy as np
import math
import random
# Rotate arrcording to angle of horizontal line.
# Should not exceed +- 5 degrees
# start_resolution and angle_percision are params of the HoughLineP function
# Higher Start Resolution decreases runtime
# Higher Angle Percision increases runtime
# Please refer to HoughLineP function for more information
# text_mode determines the mode for additional rotation.
# 0 is never use text mode.
# 1 is use text mode when angle == 0
# 2 is always use text mode
def RotateByStraightLine(img, start_resolution=2, angle_percision=1440, text_mode=0):
(h, w) = img.shape[:2]
(cX, cY) = (w // 2, h // 2)
rot_angle = 0
if(text_mode!=2):
if (w > 2400):
temp_img = cv.resize(img, None, fx=0.5, fy=0.5,
interpolation=cv.INTER_NEAREST)
edges = cv.Canny(temp_img, 100, 150, apertureSize=3)
lines = cv.HoughLinesP(edges, start_resolution, np.pi/angle_percision,
100, minLineLength=750, maxLineGap=50)
else:
edges = cv.Canny(img, 100, 150, apertureSize=3)
lines = cv.HoughLinesP(edges, 1, np.pi/angle_percision,
100, minLineLength=500, maxLineGap=50)
angles = [0.0]
if (lines is None):
print("Warning: Hough Line Algorithm can't detect lines")
else:
for line in lines:
x1, y1, x2, y2 = line[0]
angle = np.arctan((y2-y1)/(x2-x1))/np.pi*180
if (angle < 5 and angle > -5):
angles.append(angle)
# cv.line(img,(x1,y1),(x2,y2),(0,255,0),2)
# cv.imwrite(temp_path+file_name+"_"+str(i)+".jpg",img)
# End Generate Hough Line
# Rotate
rot_angle = np.median(angles)
if ((rot_angle == 0 and text_mode == 1) or text_mode == 2):
# Additional rotate when angle == 0 (For texts)
# Generate smaller hough lines
if(text_mode == 1):
print("\tAngle=0, preforming additional detection")
temp_img = cv.resize(img, None, fx=0.5, fy=0.5,
interpolation=cv.INTER_NEAREST)
edges = cv.Canny(temp_img, 100, 150, apertureSize=3)
lines = cv.HoughLinesP(edges, 1, np.pi/(angle_percision*2),
100, minLineLength=6, maxLineGap=20)
angles = [0.0]
#Find median Again
if (lines is None):
print("Warning: Additional Hough Line Algorithm can't detect lines")
else:
for line in lines:
x1, y1, x2, y2 = line[0]
angle = np.arctan((y2-y1)/(x2-x1))/np.pi*180
if (angle < 5 and angle > -5 and angle != 0):
angles.append(angle)
rot_angle = np.median(angles)
if (rot_angle != 0):
M = cv.getRotationMatrix2D((cX, cY), rot_angle, 1.0)
img = cv.warpAffine(img, M, (w, h),
flags=cv.INTER_CUBIC, borderValue=(255, 255, 255))
return (img, rot_angle)
# end if
# End Rotate
# Shrink edges until 4 edges are white
# Use 2 Criterias
# Mean is the mean value of 1 edge (out of 4 in the image)
# Min is the minimum value of 1 edge
# These 2 values are to detect whether the current edge is white enough
# Step Size controls the update pace of the cropping edge
# Smaller step size is slower, and may capture some unwanted noise.
def CropWhite(img, H, W, Mean_Thresh=240, Min_Thresh=120, Step_Size=10):
# Cropping
# recorder of minimum brightness on edge (out of 255)
edge_mean = 0
edge_min = 0
# Mean: Threshold of white (out of 255)
# Min: Threshold of black (out of 255)
# Step size of each cropping
x0 = 0
y0 = 0
x1 = img.shape[:2][1]-1
y1 = img.shape[:2][0]-1
while ((edge_mean < Mean_Thresh or edge_min < Min_Thresh)
and x1-x0 > W/3 and y1-y0 > H/3):
# Calculate Mean
if (edge_mean < Mean_Thresh):
mean_top = np.mean(img[y0, x0:x1])
mean_left = np.mean(img[y0:y1, x0])
mean_bottom = np.mean(img[y1, x0:x1])
mean_right = np.mean(img[y0:y1, x1])
edge_mean = min(mean_top, mean_left, mean_bottom, mean_right)
if (edge_mean == mean_top):
y0 += Step_Size
elif (edge_mean == mean_left):
x0 += Step_Size
elif (edge_mean == mean_bottom):
y1 -= Step_Size
elif (edge_mean == mean_right):
x1 -= Step_Size
elif (edge_min < Min_Thresh):
min_top = min(img[y0, x0:x1])
min_left = min(img[y0:y1, x0])
min_bottom = min(img[y1, x0:x1])
min_right = min(img[y0:y1, x1])
edge_min = min(min_top, min_left, min_bottom, min_right)
if (edge_min == min_top):
y0 += Step_Size
elif (edge_min == min_left):
x0 += Step_Size
elif (edge_min == min_bottom):
y1 -= Step_Size
elif (edge_min == min_right):
x1 -= Step_Size
# end While
return (img[y0:y1, x0:x1], x0, x1, y0, y1)
# Patch with size less than or equal to these threshold will be removed
# May dramatically improve quality on old printed books
# But not much on new books
def DespecklePatch(img, Despeckle_White_Size=5, Despeckle_Black_Size=10):
White_Counter = 0
Black_Counter = 0
nlabels, labels, stats, centroids = cv.connectedComponentsWithStats(
img, None, None, None, 4)
areas = stats[:, cv.CC_STAT_AREA]
left = stats[:, cv.CC_STAT_LEFT]
top = stats[:, cv.CC_STAT_TOP]
width = stats[:, cv.CC_STAT_WIDTH]
height = stats[:, cv.CC_STAT_HEIGHT]
# small_label=np.where(areas<= Despeckle_White_Size)[0]+1
if(Despeckle_White_Size!=0):
for j in range(1, nlabels):
if areas[j] <= Despeckle_White_Size:
White_Counter += 1
for x in range(left[j], left[j] + width[j]):
for y in range(top[j], top[j] + height[j]):
if labels[y, x] == j:
img[y, x] = 0
# Now invert color and despeckle black
img[:] = 255-img
nlabels, labels, stats, centroids = cv.connectedComponentsWithStats(
img, None, None, None, 4)
areas = stats[:, cv.CC_STAT_AREA]
left = stats[:, cv.CC_STAT_LEFT]
top = stats[:, cv.CC_STAT_TOP]
width = stats[:, cv.CC_STAT_WIDTH]
height = stats[:, cv.CC_STAT_HEIGHT]
if(Despeckle_Black_Size!=0):
for j in range(1, nlabels):
if areas[j] <= Despeckle_Black_Size:
Black_Counter += 1
for x in range(left[j], left[j] + width[j]):
for y in range(top[j], top[j] + height[j]):
if labels[y, x] == j:
img[y, x] = 0
# Invert color back to original
return (255-img, White_Counter, Black_Counter)
# End Despeckle
# Generate a B&W image using normal threshold
# Then generate a stronger B&W image using a higher threshold
# Apply despeckle to the stronger image, and combine these using bitwise and.
# Will partially restore blurry scans.
# Do not use on normal scans.
def StrongEnhance(img, Normal_Thresh=160, Strong_Thresh=200, Despeckle_Black_Size=50):
img_strong = cv.threshold(img, Strong_Thresh, 255, cv.THRESH_BINARY)[1]
img_strong[:] = DespecklePatch(img_strong, 5, Despeckle_Black_Size)[0]
img[:] = cv.threshold(img, Normal_Thresh, 255, cv.THRESH_BINARY)[1]
return cv.bitwise_and(img, img_strong)
# Similar to StrongEnhance()
# But use checkboard instead of stronger despeckle to merge
# Also integrates despeckle, so don't use despeckle after this
# Convolution (if turned on) only applies to high level.
def TwoLvFilter(img, Normal_Thresh=160, Strong_Thresh=200,
Despeckle_Black_Size = 16, Despeckle_White_Size = 8, Convolve = False):
img_low = cv.threshold(img, Normal_Thresh, 255, cv.THRESH_BINARY)[1]
conv_img = img
if(Convolve):
kernel = np.array([[0.1, 0.15, 0.1], [0.15, 0, 0.15], [0.1, 0.15, 0.1]])
conv_img = cv.filter2D(img, -1, kernel, borderType=cv.BORDER_CONSTANT)
img_high = cv.threshold(conv_img, Strong_Thresh, 255, cv.THRESH_BINARY)[1]
img_low = DespecklePatch(img_low, Despeckle_White_Size, Despeckle_Black_Size)[0]
img_high = DespecklePatch(img_high, Despeckle_White_Size, Despeckle_Black_Size)[0]
img_high = RemoveCheckBoard(img_high)
return cv.bitwise_and(img_high, img_low)
# Remove half of the pixels:
# mode = 0: remove interlaced pixels like a chessboard
# mode = 1: remove every pixel randomly if a random number is less than randparam.
def RemoveCheckBoard(img, mode = 0, randparam = 0.5):
w, h = img.shape
doRemove = False
for i in range(w):
for j in range(h):
if (mode == 0):
if(doRemove):
img[i, j] = 1
doRemove = not doRemove
elif (mode == 1):
if(random.random()<randparam):
img[i, j] = 1
return img
# Fit the image to the given canvas size.
# May pad or crop edges.
# If cropping must be done, avoid cropping black parts
# by calculating black areas on both sides
def FitToCanvas(img, Tgt_W, Tgt_H, Step_Size=10):
# Fit to canvas
# part of white edge being kept during centering, from 0.00 to 1.00
Keep_White = 0.2
x0 = 0
y0 = 0
x1 = img.shape[:2][1]-1
y1 = img.shape[:2][0]-1
while (x0 < x1 and min(img[:, x0] == 255)):
x0 += Step_Size
while (x1 > x0 and min(img[:, x1] == 255)):
x1 -= Step_Size
while (y0 < y1 and min(img[y0, :] == 255)):
y0 += Step_Size
while (y1 > y0 and min(img[y1, :] == 255)):
y1 -= Step_Size
# print(x0,x1,y0,y1)
# Update cropping edges based on Keep_White
x0 = math.floor(x0*(1-Keep_White))
y0 = math.floor(y0*(1-Keep_White))
x1 = math.floor((img.shape[:2][1]-1)*Keep_White+x1*(1-Keep_White))
y1 = math.floor((img.shape[:2][0]-1)*Keep_White+y1*(1-Keep_White))
img = img[y0:y1, x0:x1]
# print(x1-x0,y1-y0)
# Padding (or cropping)
x0 = int((Tgt_W-img.shape[:2][1])/2)
x1 = Tgt_W-img.shape[:2][1]-x0
y0 = int((Tgt_H-img.shape[:2][0])/2)
y1 = Tgt_H-img.shape[:2][0]-y0
# print(x0,x1,y0,y1)
if (x0+x1 == -1):
img = img[:, 0:Tgt_W-x0]
elif (x1 < 0 or x0 < 0):
p0 = 255-np.mean(img[:, 0:(0-x0)])
p1 = 255-np.mean(img[:, (Tgt_W+x1):Tgt_W-1])
if (p0 == 0 and p1 != 0):
x0 += x1
x1 = 0
elif (p1 == 0 and p0 != 0):
x1 += x0
x0 = 0
else:
if (p0 != 0 and p1 != 0 and ~(np.isnan(p0)) and ~np.isnan(p1)):
# print(p0,p1)
xsum = x0+x1
x0 = int(xsum*(p1/(p0+p1)))
x1 = xsum-x0
img = img[:, (0-x0):(Tgt_W-x0)]
else:
img = cv.copyMakeBorder(
img, 0, 0, x0, x1, cv.BORDER_CONSTANT, value=255)
# End if(x)
if (y0+y1 == -1):
img = img[0:Tgt_H, :]
elif (y1 < 0 or y0 < 0):
p0 = 255-np.mean(img[0:(0-y0), :])
p1 = 255-np.mean(img[(Tgt_H+y1):Tgt_H-1, :])
if (p0 == 0 and p1 != 0):
y0 += y1
y1 = 0
elif (p1 == 0 and p0 != 0):
y1 += y0
y0 = 0
else:
if (p0 != 0 and p1 != 0 and ~(np.isnan(p0)) and ~np.isnan(p1)):
ysum = y0+y1
y0 = int(ysum*(p1/(p0+p1)))
y1 = ysum-y0
img = img[(0-y0):(Tgt_H-y0), :]
else:
img = cv.copyMakeBorder(
img, y0, y1, 0, 0, cv.BORDER_CONSTANT, value=255)
return (img)
# End if(y)
# Divide the 2 edges into N parts.
# Check the thumbnail (in grayscale) of the resulting NxN matrix.
# Center the black part if possible.
# Center_Edge_Part is the N parts mentioned before
# Max LR Pixel is the max allowed distance of horizontal offset
# Max TB Pixel is ... of vertical offset
# You may not want to set these values too big
# Because some pages have contents that are not centered
def CenterImg(img, Center_Edge_Part=100, Max_LR_Pixels=280, Max_TB_Pixels=200):
(Tgt_H, Tgt_W) = img.shape[:2]
center_thumbnail = cv.resize(img, None, fx=0.5, fy=0.5)
center_thumbnail = 255-center_thumbnail
center_thumbnail = cv.threshold(
center_thumbnail, 200, 255, cv.THRESH_TOZERO)[1]
center_thumbnail = 255-center_thumbnail
# cv.imwrite(final_path+file_name+"_TB_"+str(i)+".png",center_thumbnail)
center_thumbnail = 255-cv.resize(img, (Center_Edge_Part, Center_Edge_Part))
top_blocks = 0
bottom_blocks = 0
left_blocks = 0
right_blocks = 0
sum_blocks = 0
while (sum_blocks == 0 and top_blocks < Center_Edge_Part):
sum_blocks = np.mean(center_thumbnail[top_blocks, :])
top_blocks += 1
sum_blocks = 0
while (sum_blocks == 0 and bottom_blocks < Center_Edge_Part):
sum_blocks = np.mean(
center_thumbnail[Center_Edge_Part-bottom_blocks-1, :])
bottom_blocks += 1
sum_blocks = 0
while (sum_blocks == 0 and left_blocks < Center_Edge_Part):
sum_blocks = np.mean(center_thumbnail[:, left_blocks])
left_blocks += 1
sum_blocks = 0
while (sum_blocks == 0 and right_blocks < Center_Edge_Part):
sum_blocks = np.mean(
center_thumbnail[:, Center_Edge_Part-right_blocks-1])
right_blocks += 1
# print("TBLR:",top_blocks,bottom_blocks,left_blocks,right_blocks)
# cv.imwrite(final_path+file_name+"_TB_"+str(i)+".png",center_thumbnail)
LR_Offset = 0
AbsLR = abs(left_blocks-right_blocks)
if (AbsLR > 3):
LR_Offset = min(AbsLR/2*Tgt_W/Center_Edge_Part, Max_LR_Pixels)
LR_Offset *= 0-AbsLR/(left_blocks-right_blocks)
LR_Offset = int(LR_Offset)
TB_Offset = 0
AbsTB = abs(top_blocks-bottom_blocks)
if (AbsTB > 3):
TB_Offset = min(AbsTB/2*Tgt_H/Center_Edge_Part, Max_TB_Pixels)
TB_Offset *= 0-AbsTB/(top_blocks-bottom_blocks)
TB_Offset = int(TB_Offset)
M = np.float32([[1, 0, LR_Offset], [0, 1, TB_Offset]])
return (cv.warpAffine(img, M, (Tgt_W, Tgt_H), borderValue=(255, 255, 255)),
LR_Offset, TB_Offset)