-
Notifications
You must be signed in to change notification settings - Fork 1
/
img_utils.py
475 lines (415 loc) · 17.7 KB
/
img_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
import mozjpeg_lossless_optimization
import io
import pickle
from PIL import Image, ImageCms, ExifTags
import cv2
import gzip
import numpy as np
import math
import logging
def rotate_warp_perspective(pil_img, rect):
"""
rotate function based on warpPerspective, from
https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
returns the most blurry results
"""
opencv_img = np.array(pil_img)
center, (width, height), angle = rect
box = cv2.boxPoints(rect)
box = np.int0(box)
src_pts = box.astype("float32")
# coordinate of the points in box points after the rectangle has been
# straightened
dst_pts = np.array([[0, int(height)-1],
[0, 0],
[int(width)-1, 0],
[int(width)-1, int(height)-1]], dtype="float32")
# the perspective transformation matrix
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
# directly warp the rotated rectangle to get the straightened rectangle
warped = cv2.warpPerspective(opencv_img, M, (int(width), int(height)), cv2.INTER_LANCZOS4)
return Image.fromarray(warped)
def sanitize_for_postprocessing(pil_img, force_apply_icc=False):
if pil_img.mode in ["1", "L", "RGB"]:
if force_apply_icc:
return apply_icc(pil_img), True
return pil_img, False
# opencv rotation can only happen on one or 3 channels, so everything else gets converted
pil_img = apply_icc(pil_img)
return pil_img.convert('RGB'), True
def rotate_warp_affine(pil_img, rect):
"""
rotate function based on warpAffine
returns acceptable results
"""
opencv_img = np.array(pil_img)
res = rotate_warp_affine_cv2(opencv_img, rect)
return Image.fromarray(res)
def rotate_warp_affine_cv2(opencv_img, rect):
"""
rotate function based on warpAffine
returns acceptable results
"""
binary = opencv_img.dtype == bool
if binary:
# for some reason warp affine doesn't work on boolean so we convert the matrix to integers
opencv_img = opencv_img.astype(np.uint8)
center, (width, height), angle = rect
if angle > 45:
angle = angle-90
width, height = height, width
M = cv2.getRotationMatrix2D(center, angle, 1.0)
# or cv2.INTER_LANCZOS4, but CUBIC looks slightly better(?)
res = cv2.warpAffine(opencv_img, M, (opencv_img.shape[1], opencv_img.shape[0]), flags=cv2.INTER_CUBIC)
res = cv2.getRectSubPix(res, (int(width), int(height)), center)
if binary:
res = res.astype(bool)
return res
def get_bounding_box(min_area_rect, width, height):
"""
Returns the smallest bounding box containing the minAreaRect.
:param min_area_rect: Tuple with center, size, and angle (e.g., ((x,y),(w,h),angle))
:returns [x, y, w, h]
Thank you ChatGPT
"""
# Extract the corner points of the minAreaRect
box_points = cv2.boxPoints(min_area_rect)
# Find the minimum and maximum x and y coordinates
x_min = max(0, int(np.min(box_points[:, 0])))
y_min = max(0, int(np.min(box_points[:, 1])))
x_max = min(width, int(np.max(box_points[:, 0])))
y_max = min(height, int(np.max(box_points[:, 1])))
# Return top-left and bottom-right corner points
return [x_min, y_min, x_max-x_min, y_max-y_min]
def extract_img(img_orig, ann_info, dst_fname = "", rotate=False):
"""
extract a subset of an image corresponding to the ann_info (type AnnotationInfo) argument
"""
if ann_info is None:
return img_orig
if not rotate: # default
return img_orig.crop((ann_info.bbox[0], ann_info.bbox[1], ann_info.bbox[0]+ann_info.bbox[2], ann_info.bbox[1]+ann_info.bbox[3]))
# subjectively, warp_affine gives a more crisp result
# the best results are with Gimp's "noHalo" interpolation, but there seems to be
# no way to use it in Python
#return rotate_warp_perspective(img_orig, ann_info.minAreaRect)
return rotate_warp_affine(img_orig, ann_info.minAreaRect)
def is_color_image(image: Image, colorness_threshold: int = 30, color_pixel_ratio_threshold: float = 0.01) -> bool:
"""
Determine if an image should be kept in RGB to avoid loss of information.
Args:
image (Image): An RGB image in Pillow format.
colorness_threshold (int): Threshold to determine if a pixel is colored based on its "colorness".
color_pixel_ratio_threshold (float): Threshold ratio of color pixels to total pixels to decide if the image is colored.
Returns:
bool: True if the image contains significant color information, False otherwise.
"""
# Convert image to numpy array
img_array = np.array(image)
# Calculate the colorness of each pixel
colorness = np.max(np.abs(img_array - np.mean(img_array, axis=2, keepdims=True)), axis=2)
# Determine colored pixels
colored_pixels = colorness > colorness_threshold
# Calculate ratio of colored pixels to total pixels
color_pixel_ratio = np.sum(colored_pixels) / img_array.size
# Determine if the image is color based on the threshold
return color_pixel_ratio > color_pixel_ratio_threshold
def encode_img_uncompressed(img, try_grayscale=False) -> (bytes, str):
"""
returns the bytes of the uncompressed tiff image
AND the expected file extension
"""
with io.BytesIO() as output:
if img.mode == "RGBA":
img = img.convert("RGB")
if img.mode == "RGB" and try_grayscale and not is_color_image(img):
img = img.convert("L")
try:
img.save(output, icc_profile=img.info.get('icc_profile'), format="TIFF", compression="tiff_deflate")
except:
try:
# https://github.com/python-pillow/Pillow/issues/7892
print("oops")
img = img.rotate(180).rotate(180)
img.save(output, icc_profile=img.info.get('icc_profile'), format="TIFF", compression="tiff_deflate")
except:
return None, ".tiff"
return output.getvalue(), ".tiff"
def encode_img(img, target_mode=None, mozjpeg_optimize=True, shrink_factor=1.0, lum_factor=1.0, quality=85):
"""
returns the bytes of the encoded image (jpg or g4 tiff if binary)
"""
cleanup_exif(img)
img = apply_icc(img)
target_mode = target_mode if target_mode is not None else get_best_mode(img)
if img.mode != target_mode:
img = img.convert(target_mode)
if shrink_factor != 1.0:
img = img.resize((int(img.width*shrink_factor), int(img.height*shrink_factor)), Image.Resampling.LANCZOS)
if target_mode != "1":
jpg_bytes = None
if lum_factor != 1.0:
img = multiply_linear_srgb_one_pil(img, lum_factor)
with io.BytesIO() as output:
img.save(output, format="JPEG", quality=quality, optimize=True, progressive=True, subsampling="4:2:2", comment="")
jpg_bytes = output.getvalue()
if mozjpeg_optimize:
jpg_bytes = mozjpeg_lossless_optimization.optimize(jpg_bytes)
return jpg_bytes, ".jpg"
out_bytes = None
with io.BytesIO() as output:
img.save(output, format="TIFF", compression="group4")
out_bytes = output.getvalue()
return out_bytes, ".tif"
def encode_thumbnail_img(img, target_mode=None, mozjpeg_optimize=True, shrink_factor=1.0, quality=85):
"""
returns the bytes of the encoded image (jpg or g4 tiff if binary)
"""
cleanup_exif(img)
img = apply_icc(img)
target_mode = target_mode if target_mode is not None else get_best_thumbnail_mode(img)
if img.mode != target_mode:
img = img.convert(target_mode)
if shrink_factor != 1.0:
img = img.resize((int(img.width*shrink_factor), int(img.height*shrink_factor)), Image.Resampling.LANCZOS)
if target_mode != "1":
jpg_bytes = None
with io.BytesIO() as output:
img.save(output, format="JPEG", quality=quality, optimize=True, progressive=True, subsampling="4:2:2", comment="")
jpg_bytes = output.getvalue()
if mozjpeg_optimize:
jpg_bytes = mozjpeg_lossless_optimization.optimize(jpg_bytes)
return jpg_bytes, ".jpg"
out_bytes = None
with io.BytesIO() as output:
img.save(output, format="PNG")
out_bytes = output.getvalue()
return out_bytes, ".png"
def cleanup_exif(img):
exif_fields = list(img.info.keys())
for k in exif_fields:
del img.info[k]
def get_best_mode(img):
"""
returns the best Pillow mode for an image
"""
if img.mode in ["1", "L", "RGB"]:
return img.mode
return "RGB"
def get_best_thumbnail_mode(img):
"""
returns the best Pillow mode for an image
"""
if img.mode == "1":
return "1"
return "L"
def is_grayscale(img):
# https://stackoverflow.com/a/23661373/2560906
# but having a threshold in the differences
return False
def apply_icc(img):
"""
Convert PIL image to sRGB color space (if possible)
"""
icc = img.info.get('icc_profile', '')
if icc and img.mode not in ["1", "L"]:
io_handle = io.BytesIO(icc) # virtual file
src_profile = ImageCms.ImageCmsProfile(io_handle)
dst_profile = ImageCms.createProfile('sRGB')
if img.mode in ["RGB", "RGBA"]:
ImageCms.profileToProfile(img, src_profile, dst_profile, inPlace=True)
else:
img = ImageCms.profileToProfile(img, src_profile, dst_profile, outputMode='RGB')
return img
def apply_exif_rotation(img):
"""
apply rotation recorded in exif data
https://stackoverflow.com/a/26928142/2560906
"""
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation]=='Orientation':
break
exif = None
try:
exif = img._getexif()
except:
return img
if exif is None:
return img
if exif[orientation] == 3:
return img.rotate(180, expand=True)
elif exif[orientation] == 6:
return img.rotate(270, expand=True)
elif exif[orientation] == 8:
return img.rotate(90, expand=True)
return img
def extract_encode_img(img_orig, sam_annotation, dst_fname, rotate=False):
cropped_img = extract_img(img_orig, sam_annotation, dst_fname, rotate)
return encode_img(cropped_img)
COLORS = [
(0,255,0),
(255,0,0),
(0,0,255),
(255,255,0),
(0,255,255),
(255,0,255),
(0,127,0),
(127,0,0),
(0,0,127),
(127,127,0),
(0,127,127),
(127,0,127)
]
def get_debug_img_bytes(img_orig, image_anns, max_size_px=256, draw_rotated=True, line_thickness=2):
"""
return an encoded jpg with the image annotations represented as rectangles
the image is converted to RGB and resized
"""
# resize factor
rf = max(max_size_px/float(img_orig.size[0]), max_size_px/float(img_orig.size[1]))
img_orig = img_orig.resize((int(img_orig.size[0]*rf), int(img_orig.size[1]*rf)), Image.BICUBIC)
# produce an opencv image
img_orig = img_orig.convert('RGB')
new_img = cv2.cvtColor(np.array(img_orig), cv2.COLOR_RGB2BGR)
img_org = None # gc
# resize_factor
for i, image_ann in enumerate(image_anns):
(oldcx, oldcy), (oldw, oldh), angle = image_ann.minAreaRect
new_rect = ((oldcx*rf,oldcy*rf), (oldw*rf, oldh*rf), angle)
box = np.int0(cv2.boxPoints(new_rect))
color = COLORS[i % len(COLORS)]
cv2.drawContours(new_img, [box], 0, color, line_thickness)
new_img = Image.fromarray(new_img)
return encode_img(new_img, target_mode="RGB", mozjpeg_optimize=False)
def sRGB_inverse_gamma(value):
"""Applies the inverse sRGB gamma correction on a normalized RGB value."""
if value <= 0.04045:
return value / 12.92
else:
return ((value + 0.055) / 1.055) ** 2.4
def sRGB_gamma(value):
"""Applies the sRGB gamma correction on a normalized RGB value."""
if value <= 0.0031308:
return 12.92 * value
else:
return 1.055 * (value ** (1.0 / 2.4)) - 0.055
def multiply_linear_srgb_one_pil(img, lum_factor, no_crop=True):
"""
wrapper for multiply_linear_srgb for PIL images
"""
multiplied_cv2 = multiply_linear_srgb_one(np.asarray(img), lum_factor, no_crop)
return Image.fromarray(multiplied_cv2)
def multiply_linear_srgb_one(img, factor, no_crop=True):
"""
This function converts the image in linear sRGB, applies a linear transformation on
each color channel using the same factor, and then transforms the result back to sRGB.
If no_crop is True, adjusts the factor so the highest value is 255.
"""
bit_depth = img.dtype.itemsize * 8 # Determine bit depth per channel
num_channels = img.shape[2] if len(img.shape) > 2 else 1 # Determine number of channels
# Generate LUT for the given factor
lut = np.zeros((1 << bit_depth, 1), dtype=np.float32) # Initialize LUT based on bit depth
for i in range(lut.shape[0]):
normalized_val = i / float(lut.shape[0] - 1) # Normalize to [0, 1]
# Apply inverse sRGB gamma, multiply by factor, and apply sRGB gamma
val = sRGB_inverse_gamma(normalized_val)
val = val * factor
val = sRGB_gamma(val)
lut[i] = np.clip(val * (lut.shape[0] - 1), 0, lut.shape[0] - 1) # Scale back and clip
lut = lut.astype(img.dtype)
if no_crop:
# Find the maximum value after applying the factor
max_val = 0
for i in range(lut.shape[0]):
if lut[i] > max_val:
max_val = lut[i]
# Adjust the factor if the max value exceeds the max possible value for the bit depth
if max_val > 255:
adjustment_factor = 255.0 / max_val
logging.error("had to adjust lum_factor by %f" % adjustment_factor)
lut = (lut * adjustment_factor).astype(img.dtype)
# Apply the LUT to each channel of the image
if num_channels > 1:
result_img = cv2.merge([cv2.LUT(img[:, :, c], lut) for c in range(num_channels)])
else:
result_img = cv2.LUT(img, lut)
return result_img
def multiply_linear_srgb(img, factors):
"""
This function converts the image in linear sRGB, applies a linear transformation on
each color channel and then transforms the result in regular sRGB.
"""
bit_depth = img.dtype.itemsize * 8 # Determine bit depth per channel
num_channels = img.shape[2] if len(img.shape) > 2 else 1 # Determine number of channels
# Generate LUTs for each channel
luts = []
for c in range(num_channels):
lut = np.zeros((1 << bit_depth, 1), dtype=np.float32) # Initialize LUT based on bit depth
for i in range(lut.shape[0]):
normalized_val = i / float(lut.shape[0] - 1) # Normalize to [0, 1]
# Apply inverse sRGB gamma, multiply by factor, and apply sRGB gamma
val = sRGB_inverse_gamma(normalized_val)
val = val * factors[c]
val = sRGB_gamma(val)
lut[i] = np.clip(val * (lut.shape[0] - 1), 0, lut.shape[0] - 1) # Scale back and clip
luts.append(lut.astype(img.dtype))
# Apply the LUTs to each channel of the image
if num_channels > 1:
result_img = cv2.merge([cv2.LUT(img[:, :, c], luts[c]) for c in range(num_channels)])
else:
result_img = cv2.LUT(img, luts[0])
return result_img
def srgb_to_lnsrgb(rgb_array, bps):
res = []
for v in rgb_array:
if bps:
v = v / 2^bps
v = sRGB_inverse_gamma(v)
res.append(v)
return np.array(res)
def get_linear_factors(srgb_img, bbox, expected_nsRGB):
x_start, y_start, bbox_w, bbox_h = bbox
white_patch = srgb_img[y_start:(y_start+bbox_h), x_start:(x_start+bbox_w)]
median_srgb = np.median(white_patch.reshape(-1, 3), axis=0)
median_lnsrgb = srgb_to_lnsrgb(median_srgb, 16 if img.dtype.itemsize == 2 else 8)
expected_lnsrgb = srgb_to_lnsrgb(expected_nsRGB, 0)
scale_factors = expected_lnsrgb / median_lnsrgb
return scale_factors
def apply_scale_factors_pil(pil_img, linear_rgb_factors):
np_img = np.array(img)
np_transformed_img = multiply_linear_srgb(np_img, linear_rgb_factors)
pil_img.paste(Image.fromarray(np_transformed_img))
def rotate_mar(rect, n, image_width, image_height):
"""
Rotates a cv2.minAreaRect around the center of the image by n degrees, where n can be 90, 180, or 270.
Adjusts the position and orientation of the rectangle accordingly.
Parameters:
- rect: The cv2.minAreaRect to rotate, in the form ((center_x, center_y), (width, height), angle).
- n: The rotation angle, which can be 90, 180, or 270 degrees.
- image_width: Width of the image.
- image_height: Height of the image.
Returns:
- A rotated cv2.minAreaRect in the same format.
"""
if n not in [90, 180, 270]:
raise ValueError("Rotation must be 90, 180, or 270 degrees")
((center_x, center_y), (width, height), angle) = rect
image_center = np.array([image_width / 2.0, image_height / 2.0])
# Calculate the new center after rotation
rect_center = np.array([center_x, center_y])
rect_center -= image_center # Translate to rotate around image center
cos_angle = np.cos(np.radians(n))
sin_angle = np.sin(np.radians(n))
rotated_center = np.dot(np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]]), rect_center)
rotated_center += image_center # Translate back after rotation
# Width and height are swapped if rotated by 90 or 270 degrees
if n == 90 or n == 270:
new_width, new_height = height, width
else: # For 180 degrees, width and height remain the same
new_width, new_height = width, height
# Adjust angle for rotation
new_angle = angle - n
# Normalize the new angle to the range [-90, 0) as per cv2.minAreaRect convention
if new_angle <= -90:
new_angle += 180
return ((rotated_center[0], rotated_center[1]), (new_width, new_height), new_angle)