-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLabelRecogniser.m
546 lines (449 loc) · 25 KB
/
LabelRecogniser.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
classdef LabelRecogniser
%LabelRecogniser Expiry Date Detection and Recognition from an image.
%
% Enables the detection and recognition of expiry dates from images
% of food packaging for use within a date verification system.
%
% LabelRecogniser(image) initialises the class with an image. This
% can be a file path to an image or a uint8 image matrix.
%
% This class utilises the Image Processing and Computer Vision
% Toolboxes.
properties
image; % Input image matrix used for recognition
thresholdDelta = 2.5; % Step size between MSER intensity thresholds (default: 2.5)
strokeWidthVariationThreshold = 0.4; % Maximum difference in the width letters strokes (default: 0.4)
orientationThreshold = 7.5; % Angle that needs to be exceeded to perform orientation correction (default: 7.5)
end
properties (Access = private)
height % height of input image
width % width of input image
end
methods
function obj = LabelRecogniser(img)
%LabelRecogniser Construct an instance of this class from a filepath or uint8 image matrix.
if nargin > 0
obj.image = img;
end
end
function [text, bboxes] = recogniseDates(obj)
%recogniseDates Identify the position and textual representation of the dates shown within the image.
%
% text = recogniseDates() provides all of the recognised dates.
% Returns an empty string array if none are found.
%
% [text, bboxes] = recogniseDates() provides all of the recognised
% dates and the bounding boxes of identified date regions.
%Check that there is a valid image
if isempty(obj.image)
error("LabelRecogniser:NoImage", "No image was provided for recognition.");
end
%Perform steps to detect and recognise expiry dates from an image
img = convertGrey(obj, obj.image);
grey = preProcess(obj, img);
img = mser(obj, grey);
img = postProcess(obj, img);
img = connectedComponentEnhance(obj, img, grey);
img = geometricFilter(obj, img);
img = strokeWidthTransform(obj, img);
[img, bboxes] = textGrouping(obj, img);
allText = characterRecognition(obj, img, bboxes);
text = dateMatching(obj, allText);
end
function obj = set.image(obj, value)
%Setter for image property
%Perform different cases dependent on value's data type
switch class(value)
case {'char', 'string'} %filepath
try
%Attempt to read image from filepath
obj.image = imread(value);
catch
error("LabelRecogniser:BadImageFile", "Cannot read image due to an invalid file type.");
end
obj = obj.updateImageDimensions();
case 'uint8' %image matrix
%Check that dimensions are sufficient to be an image
if ~isvector(value)
obj.image = value;
obj = obj.updateImageDimensions();
else
error("LabelRecogniser:BadImageDimensions", "Image dimensions are too small.");
end
otherwise %else
error("LabelRecogniser:BadImage", "Invalid data type. Must be a file location or uint8 array.");
end
end
function obj = set.thresholdDelta(obj, value)
%Setter for thresholdDelta property
%Ensure that value falls within range supported by MSER
if value > 0 && value <= 100
obj.thresholdDelta = value;
else
error('LabelRecogniser:ExceededRange', 'The chosen value exeeds the acceptable range of [0 100].');
end
end
function obj = set.strokeWidthVariationThreshold(obj, value)
%Setter for strokeWidthVariationThreshold property
%Ensure that value falls within range of possible SWT values
if value > 0 && value <= 1
obj.strokeWidthVariationThreshold = value;
else
error('LabelRecogniser:ExceededRange', 'The chosen value exeeds the acceptable range of [0 1].');
end
end
function obj = set.orientationThreshold(obj, value)
%Setter for orientationThreshold property
%Ensure that value falls within range of possible angles
if value >= 0 && value <= 90
obj.orientationThreshold = value;
else
error('LabelRecogniser:ExceededRange', 'The chosen value exeeds the acceptable range of [0 90].');
end
end
end
methods (Access = private)
function obj = updateImageDimensions(obj)
%updateImageDimensions Update the height and width properties according to image dimensions
[obj.height, obj.width, ~] = size(obj.image);
end
function gImage = convertGrey(~, image)
%convertGrey Convert an RGB image to greyscale
if size(image, 3) == 3
gImage = rgb2gray(image);
else
gImage = image;
end
end
function pImage = preProcess(~, image)
%preProcess Perform image enhancements to improve text clarity
%Perform Spatial Filtering to eliminate noise
noise = wiener2(image, [3 3]);
%Perform Linear Contrast Stretching
contrast = imadjust(noise);
%Use Unsharp Masking to increase image sharpness
pImage = imsharpen(contrast);
end
function mserImage = mser(obj, image)
%mser Detect text candiates using Maximally Stable Extremal Regions
%Initialise logical image with necessary dimensions
mserImage = false(obj.height, obj.width);
%Perform MSER text detection
mser = detectMSERFeatures(image, 'RegionAreaRange', [150 1500], ...
'ThresholdDelta', obj.thresholdDelta, 'MaxAreaVariation', 0.2);
% Ensure that text has been found with provided MSER parameters
if mser.Count > 1
% Concatenate pixel coordinates from cell to Nx2 matrix
pixels = cell2mat(mser.PixelList);
elseif mser.Count == 1
% Handle PixelList in matrix form
pixels = mser.PixelList;
else
error('LabelRecogniser:InvalidMSERParams', 'No text detected by MSER. Ensure parameters are suitable.');
end
%Convert img co-ordinates to linear image indexes
ind = sub2ind([obj.height, obj.width], pixels(:,2), pixels(:,1));
%Set matching pixels to white
mserImage(ind) = true;
end
function pImage = postProcess(~, image)
%postProcess Cleanup MSER binary image
%Define 3x3 square structuring element
element = strel('square', 3);
%Perform opening to remove protrusions and small joins
open = imopen(image, element);
%Remove blobs with an area less than 100 px
clearNoise = bwareaopen(open, 100);
%Fill small holes
pImage = ~bwareaopen(~clearNoise, 3);
end
function fImage = connectedComponentEnhance(obj, image, greyscale)
%connectComponentEnhance Perform connected component enhancement to clean up binary MSER image
stats = regionprops(image, 'Image', 'BoundingBox');
numObjects = size(stats, 1);
adjusted = false(obj.height, obj.width);
for i = 1:numObjects
%Get bounding box and image for current object
bbox = ceil(stats(i).BoundingBox - [0, 0, 1, 1]);
imageSegment = stats(i).Image;
%Threshold the cropped greyscale image using Otsu's Method
binarySegment = imbinarize(imcrop(greyscale, bbox));
%Identify connected components in the intersection between binary image
%and thresholded image
ccRegularImage = bwconncomp(imageSegment & binarySegment);
ccInverseImage = bwconncomp(imageSegment & ~binarySegment);
%Check objects to find which has the fewest number of components above 0 which
%will likely have the appropriate threshold
if ccRegularImage.NumObjects == 0 && ccInverseImage.NumObjects ~= 0
%Use inverse image
keepImage = imageSegment & ~binarySegment;
elseif ccInverseImage.NumObjects == 0 && ccRegularImage.NumObjects ~= 0
%Use regular image
keepImage = imageSegment & binarySegment;
elseif ccRegularImage.NumObjects < ccInverseImage.NumObjects
%Use regular image
keepImage = imageSegment & binarySegment;
elseif ccInverseImage.NumObjects < ccRegularImage.NumObjects
%Use inverse image
keepImage = imageSegment & ~binarySegment;
%Edge case where they could have matching number of objects
elseif ccInverseImage.NumObjects == ccRegularImage.NumObjects
%Concatenate list to handle multiple objects in an image
ccRegularSize = size(cat(1, ccRegularImage.PixelIdxList{:}), 1);
ccInverseSize = size(cat(1, ccInverseImage.PixelIdxList{:}), 1);
%Choose threshold that produces the most pixels
if ccRegularSize < ccInverseSize
keepImage = imageSegment & ~binarySegment;
else
keepImage = imageSegment & binarySegment;
end
else
%Use regular image as last resort
keepImage = imageSegment & binarySegment;
end
%Replace existing image locations with new enhanced images
adjusted(bbox(2):bbox(2) + bbox(4), bbox(1):bbox(1) ...
+ bbox(3)) = keepImage;
end
%Remove blobs with an area below 100 pixels
clearNoise = bwareaopen(adjusted, 20);
%Fill small holes by inverting image so the background becomes foreground
fImage = ~bwareaopen(~clearNoise, 3);
end
function fImage = geometricFilter(~, image)
%geometricFilter Remove unlikely text candidates using geometric properties
%Label the image and get the properties of each object
label = bwlabel(image);
stats = regionprops(image, 'BoundingBox', 'Eccentricity', ...
'EulerNumber', 'Extent', 'Solidity');
%Calculate the maximum aspect ratio for horizontal and vertical direction
bboxes = vertcat(stats.BoundingBox);
bbWidths = bboxes(:, 3).';
bbHeights = bboxes(:, 4).';
aspectRatio = max(bbWidths ./ bbHeights, bbHeights ./ bbWidths);
%Keep blobs that have less than 3 holes in them. Need to compensate for noise
validEulerNo = [stats.EulerNumber] >= -2;
%Keep blobs that are not perfect lines (eg. barcodes)
validEccentricity = [stats.Eccentricity] < 0.99;
%Keep blobs that fall within normal distribution of Area to BBox ratio
validExtent = [stats.Extent] > 0.25 & [stats.Extent] < 0.9;
%Keep blobs that do not have significant convex areas
validSolidity = [stats.Solidity] > 0.4;
%Keep blobs that are not extremely elongated
validAspectRatio = aspectRatio < 3;
%Find the index of all objects that have valid properties
keep = find(validEulerNo & validEccentricity & validExtent & ...
validSolidity & validAspectRatio);
%Return pixels that are at the valid indexes for the image
fImage = ismember(label, keep);
end
function fImage = strokeWidthTransform(obj, image)
%strokeWidthTransform Remove unlikley text candidates based upon the width of the characters
%Label the image and get the smallet image encapsulating each object
label = bwlabel(image);
stats = regionprops(image, 'Image');
%Initialise variables that hold the number of objects and their stroke
%width variations
numObjects = size(stats, 1);
swVariations = zeros(1, numObjects);
%Loop through all objects in image
for i = 1:numObjects
%Get the image conting just the object
imageSegment = stats(i).Image;
%Pad the image with 0's to avoid stroke width being affected by boundary
paddedimage = padarray(imageSegment, [1 1]);
%Perform Distance Transform
distanceTransform = bwdist(~paddedimage);
%Perform thinning until Skeleton is created
skeletonTransform = bwmorph(paddedimage, 'thin', inf);
%Retrieve the stroke width values for the image
strokeWidths = distanceTransform(skeletonTransform);
%Calculate the variation in stroke widths
swVariations(i) = std(strokeWidths)/mean(strokeWidths);
end
%Find valid stroke widths that are below the variation threshold
validStrokeWidths = swVariations < obj.strokeWidthVariationThreshold;
%Find the index of these objects
keep = find(validStrokeWidths);
%Create an image made of objects that are below the variation threshold
fImage = ismember(label, keep);
end
function [image, boxes] = textGrouping(obj, image)
%textGrouping Establish bounding boxes around potential dates using a set of rules
%Ensure that there are 2 output arguments
nargoutchk(2, 2);
%Get the bounding box for each object and convert to usable coordinates
stats = regionprops(image, 'BoundingBox');
ROIs = vertcat(stats.BoundingBox);
if isempty(ROIs)
error('LabelRecogniser:NoBoundingBoxes', 'Bounding Boxes could not be created since no text was detected. Try relaxing the filtering parameters.');
end
%Seperate bounding box into seperate variables for manipulation
roiX = ROIs(:, 1);
roiY = ROIs(:, 2);
roiW = ROIs(:, 3);
roiH = ROIs(:, 4);
%Expand ROI by 2/3 the character width in horizontal direction
expandedX = roiX - (roiW * (2/3));
expandedW = roiW + ((roiW * (2/3)) * 2);
%Ensure that ROI is within bounds of the image
expandedX = max(expandedX, 1);
expandedW = min(expandedW, obj.width - expandedX);
%Create expanded bounding boxes
expandedROI = [expandedX, roiY, expandedW, roiH];
%Calculate the ratio of union between bounding boxes
overlapRatio = bboxOverlapRatio(expandedROI, expandedROI, 'Union');
overlapSize = size(overlapRatio, 1);
%Remove union with own Bounding Box
overlapRatio(1:overlapSize + 1:overlapSize^2) = 0;
%Create node graph of connected Bounding Boxes & find the index of boxes
%that intersect
labelledROI = conncomp(graph(overlapRatio));
%Find the total number of bounding boxes
totalComponents = max(labelledROI);
%loop through connected bounding boxes
for i = 1:totalComponents
%find the index of connected bounding boxes
connectedBoxes = find(labelledROI == i);
%Get their heights
boxHeight = roiH(connectedBoxes);
%Calculate the average height of connected bounding boxes
meanHeight = mean(boxHeight);
meanError = meanHeight/2;
%Find all bounding boxes that have a height that matches the criteria
validBoxes = boxHeight > meanHeight - meanError & boxHeight < meanHeight + meanError;
%Get the index of all bounding boxes that don't match the criteria
invalidHeight = connectedBoxes(validBoxes == 0);
%Check that invalidHeights is not empty
if (~isempty(invalidHeight))
%Loop through invalid indexes
for j = 1:size(invalidHeight, 2)
%Get the id of the bounding box with an invalid height
id = invalidHeight(j);
%Seperate the component into a new indices by creating
%a new 'label' above max value
labelledROI(id) = max(labelledROI) + 1;
end
end
end
%Find the minimum values of X & Y and the maximum values of W & H for each
%of the intersecting bounding boxes to form encompassing bounding boxes
labelledROI = labelledROI.';
x1 = accumarray(labelledROI, expandedX, [], @min); % |------------(x2,y2)
y1 = accumarray(labelledROI, roiY, [], @min); % | |
x2 = accumarray(labelledROI, expandedX + expandedW, [], @max); % | |
y2 = accumarray(labelledROI, roiY + roiH, [], @max); % (x1,y1)------------|
%Create merged bounding boxes in [X Y H W] format
mergedTextROI = [x1, y1, x2 - x1, y2 - y1];
%Calculate size of labels after updating connected bounding boxes
maxLabel = max(labelledROI);
labelSizes = histcounts(labelledROI.', maxLabel, ...
'BinMethod', 'integers', 'BinLimits', [0.5, maxLabel + 0.5]);
% labelSizes = histcounts(labelledROI.', max(labelledROI), ...
% 'BinMethod', 'integers');
%Remove single, unconnected bounding boxes
wordCandidates = labelSizes > 1;
filteredTextROI = mergedTextROI(wordCandidates, :);
%Get the bounding boxes of removed objects
removePixelsROI = mergedTextROI(~wordCandidates, :);
%Ensure the bounding box isn't empty
removePixelsROI(all(~removePixelsROI, 2), :) = [];
%Get the [X Y H W] values of bounding box accounting for real values
removeMinX = ceil(removePixelsROI(:, 1));
removeMaxX = round(removeMinX + removePixelsROI(:, 3));
removeMinY = ceil(removePixelsROI(:, 2));
removeMaxY = round(removeMinY + removePixelsROI(:, 4));
%Remove single, unconnected bounding boxes for binary image
for i = 1:size(removePixelsROI, 1)
%Set pixels to 0 inside bounding boxes that need to be removed
image(removeMinY(i):removeMaxY(i), removeMinX(i):removeMaxX(i)) = 0;
end
%Expand the bounding box vertically to fully contain the text's height
expansionValue = 2;
expandedY = filteredTextROI(:, 2) - expansionValue;
expandedH = filteredTextROI(:, 4) + (2 * expansionValue);
%Ensure that ROI is within bounds of the image
expandedY = max(expandedY, 1);
expandedH = min(expandedH, obj.height - expandedH);
%Create expanded bounding boxes in appropriate format
boxes = [filteredTextROI(:, 1), expandedY, filteredTextROI(:, 3), ...
expandedH];
end
function roi = orientationCorrection(obj, roi, letters)
%orientationCorrection Ensure that each word has a horizontal orientation
%Number of points used to estimate orientation
samplingPoints = 10;
%Get the centre of the bounding boxes
centreX = round(mean([letters(:, 1), letters(:, 1) + letters(:, 3)], 2));
centreY = round(mean([letters(:, 2), letters(:, 2) + letters(:, 4)], 2));
%Calculate line of best fit coeffecients using the centre of letters
bestFit = polyfit(centreX, centreY, 1);
%Create linearly spaced vector of 10 sample points from 1 to width of
%the image
xPositions = linspace(1, size(roi, 2), samplingPoints);
%Estimate the values of y at x values using the best fit coeffecient
%to create a line of best fit
yPositions = polyval(bestFit, xPositions);
%Calculate the line of best fit's angle
orientation = atan2d(yPositions(samplingPoints) - yPositions(1), ...
xPositions(samplingPoints) - xPositions(1));
%Don't correct image if it is within 7.5 degrees of 0
%OCR is capable of reading letters most accurately at ~< 10 degree offset
if (orientation > obj.orientationThreshold || orientation < -obj.orientationThreshold)
%Rotate the image by the angle using nearest negithbour interpolation
roi = imrotate(roi, orientation);
end
end
function text = characterRecognition(obj, image, bboxes)
%characterRecognition Recognise characters within bounding boxes
%Pre-allocate vectors & initialise variables to hold number of bboxes, OCR
%results and the sampling points for the line of best fit
numBboxes = size(bboxes, 1);
text = strings(numBboxes, 1);
%Loop through text lines
for i = 1:numBboxes
%Crop binary image using expanded bounding boxes
roi = imcrop(image, bboxes(i, :));
%Remove pixels touching the edge since they are probably not related to text
roi = imclearborder(roi);
%Get minimum Bounding Boxes for each letter
stats = regionprops(roi, 'BoundingBox');
%Check that there is more than one object in image
if (size(stats, 1) > 1)
%Convert to 4xN matrix for further operations
letters = vertcat(stats.BoundingBox);
roi = orientationCorrection(obj, roi, letters);
else
continue
end
%Perform OCR on the image using MATLAB's Tesseract-OCR 3.02 implementation
%Specifying 'Block' will ensure that it looks for one or more horizontal text lines
%Specifying the character set will reduce the chance of confusion with
%characters that cannot be found in dates
ocrOutput = ocr(roi, 'TextLayout', 'Block', 'CharacterSet', ...
'1234567890ABCDEFGHIJLMNOPRSTUVYabcdefghijlmnoprstuvy/.-:');
%Store the detected text
text(i) = ocrOutput.Text;
end
end
function dates = dateMatching(~, text)
%dateMatching Identfiy and isolate common date formats found within text
regexTextDate = '(\d{1,2})([\/\\\-\. ]|)(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)([\/\\\-\. ]|)((?:\d{2}){0,2})';
regexTextYear = '^(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)([\/\\\-\. ]|)(?:\d{2}){1,2}';
regexNumeric = '(\d{1,2})([\/\\\-\.])(\d{1,2})(\2)((?:\d{2}){1,2})';
%Remove newline characters from the end of the text
text = strip(text, newline);
%Perform case-insensitive regular expression matching for dates, returning
%a cell array of matching text
validTextDate = regexpi(text, regexTextDate, 'match');
validTextYear = regexpi(text, regexTextYear, 'match');
validNumeric = regexpi(text, regexNumeric, 'match');
%concatenate matching text into single string array
dates = string(vertcat(validTextDate{:}, validTextYear{:}, validNumeric{:}));
end
end
end
%{
TODO:
Could maybe do regionprops on whole image then segement into regions
%}