diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py index 3ee5a5d4..c4b8f454 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py @@ -5,7 +5,7 @@ import cv2 import numpy as np -from dedocutils.preprocessing import AdaptiveBinarizer +from dedocutils.preprocessing import AdaptiveBinarizer, SkewCorrector from dedoc.config import get_config from dedoc.extensions import recognized_extensions, recognized_mimes @@ -15,7 +15,6 @@ from dedoc.readers.pdf_reader.pdf_base_reader import ParametersForParseDoc, PdfBaseReader from dedoc.readers.pdf_reader.pdf_image_reader.columns_orientation_classifier.columns_orientation_classifier import ColumnsOrientationClassifier from dedoc.readers.pdf_reader.pdf_image_reader.ocr.ocr_line_extractor import OCRLineExtractor -from dedoc.readers.pdf_reader.pdf_image_reader.scan_rotator import ScanRotator from dedoc.train_dataset.train_dataset_utils import save_page_with_bbox from dedoc.utils import supported_image_types @@ -47,7 +46,7 @@ def __init__(self, *, config: dict) -> None: :param config: configuration of the reader, e.g. logger for logging """ super().__init__(config=config) - self.scan_rotator = ScanRotator(config=config) + self.scew_corrector = SkewCorrector() self.column_orientation_classifier = ColumnsOrientationClassifier(on_gpu=False, checkpoint_path=get_config()["resources_path"], config=config) @@ -77,7 +76,7 @@ def _process_one_page(self, # --- Step 2: do binarization --- if parameters.need_binarization: - rotated_image = self.binarizer.binarize(rotated_image) + rotated_image = self.binarizer.preprocess(rotated_image) if self.config.get("debug_mode"): cv2.imwrite(os.path.join(self.config["path_debug"], f"{datetime.now().strftime('%H-%M-%S')}_result_binarization.jpg"), rotated_image) @@ -121,7 +120,9 @@ def _detect_column_count_and_orientation(self, image: np.ndarray, parameters: Pa angle = angle if parameters.document_orientation is None else 0 self.logger.info(f"Final orientation angle = {angle}, is_one_column_document = {is_one_column_document}") - rotated_image, result_angle = self.scan_rotator.auto_rotate(image, angle) + rotated_image, result_angle = self.scew_corrector.preprocess(image, {"orientation_angle": angle}) + result_angle = result_angle["rotated_angle"] + if self.config.get("debug_mode"): img_path = os.path.join(self.config["path_debug"], f"{datetime.now().strftime('%H-%M-%S')}_result_orientation.jpg") self.logger.info(f"Save image to {img_path}") diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/scan_rotator.py b/dedoc/readers/pdf_reader/pdf_image_reader/scan_rotator.py deleted file mode 100644 index 897c26a0..00000000 --- a/dedoc/readers/pdf_reader/pdf_image_reader/scan_rotator.py +++ /dev/null @@ -1,47 +0,0 @@ -import logging - -import cv2 -import numpy as np - -from dedoc.utils.image_utils import rotate_image - - -class ScanRotator: - """ - Class corrects document's skew. - """ - def __init__(self, *, config: dict) -> None: - self.delta = 1 # step - self.limit = 45 # max angle - self.config = config - self.logger = config.get("logger", logging.getLogger()) - - def determine_score(self, arr: np.ndarray, angle: int) -> (np.ndarray, float): - data = rotate_image(arr, angle) - histogram = np.sum(data, axis=1, dtype=float) - score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float) - return score - - def auto_rotate(self, image: np.ndarray, orientation_angle: float = 0.) -> (np.ndarray, float): - if orientation_angle: - image = rotate_image(image, orientation_angle) - - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] - - angles = np.arange(-self.limit, self.limit + self.delta, self.delta) - scores = [self.determine_score(thresh, angle) for angle in angles] - - max_idx = scores.index(max(scores)) - if max_idx >= 2 and scores[max_idx - 2] > scores[max_idx] * 0.98: - # if there are 2 approximately equal scores +- 1 step by max_score it will utilize angle between them - best_angle = angles[max_idx - 1] - elif max_idx < len(scores) - 2 and scores[max_idx + 2] > scores[max_idx] * 0.98: - best_angle = angles[max_idx + 1] - else: - best_angle = angles[scores.index(max(scores))] - - rotated = rotate_image(image, best_angle) - if self.config.get("debug_mode"): - self.logger.debug(f"Best angle: {best_angle}, orientation angle: {orientation_angle}") - return rotated, best_angle + orientation_angle diff --git a/tests/data/scan_rotator/rotated_1.jpg b/tests/data/skew_corrector/rotated_1.jpg similarity index 100% rename from tests/data/scan_rotator/rotated_1.jpg rename to tests/data/skew_corrector/rotated_1.jpg diff --git a/tests/data/scan_rotator/rotated_2.jpg b/tests/data/skew_corrector/rotated_2.jpg similarity index 100% rename from tests/data/scan_rotator/rotated_2.jpg rename to tests/data/skew_corrector/rotated_2.jpg diff --git a/tests/data/scan_rotator/rotated_3.jpg b/tests/data/skew_corrector/rotated_3.jpg similarity index 100% rename from tests/data/scan_rotator/rotated_3.jpg rename to tests/data/skew_corrector/rotated_3.jpg diff --git a/tests/data/scan_rotator/rotated_4.jpg b/tests/data/skew_corrector/rotated_4.jpg similarity index 100% rename from tests/data/scan_rotator/rotated_4.jpg rename to tests/data/skew_corrector/rotated_4.jpg diff --git a/tests/data/scan_rotator/short_lines-1.png b/tests/data/skew_corrector/short_lines-1.png similarity index 100% rename from tests/data/scan_rotator/short_lines-1.png rename to tests/data/skew_corrector/short_lines-1.png diff --git a/tests/data/scan_rotator/short_lines-2.png b/tests/data/skew_corrector/short_lines-2.png similarity index 100% rename from tests/data/scan_rotator/short_lines-2.png rename to tests/data/skew_corrector/short_lines-2.png diff --git a/tests/data/scan_rotator/short_lines-3.png b/tests/data/skew_corrector/short_lines-3.png similarity index 100% rename from tests/data/scan_rotator/short_lines-3.png rename to tests/data/skew_corrector/short_lines-3.png diff --git a/tests/data/scan_rotator/short_lines-4.png b/tests/data/skew_corrector/short_lines-4.png similarity index 100% rename from tests/data/scan_rotator/short_lines-4.png rename to tests/data/skew_corrector/short_lines-4.png diff --git a/tests/data/scan_rotator/short_lines-5.png b/tests/data/skew_corrector/short_lines-5.png similarity index 100% rename from tests/data/scan_rotator/short_lines-5.png rename to tests/data/skew_corrector/short_lines-5.png diff --git a/tests/unit_tests/test_format_pdf_reader.py b/tests/unit_tests/test_format_pdf_reader.py index 5ab863d8..4ed742c5 100644 --- a/tests/unit_tests/test_format_pdf_reader.py +++ b/tests/unit_tests/test_format_pdf_reader.py @@ -6,11 +6,11 @@ from typing import List import cv2 +from dedocutils.preprocessing import SkewCorrector from dedoc.data_structures.line_with_meta import LineWithMeta from dedoc.readers.pdf_reader.pdf_image_reader.columns_orientation_classifier.columns_orientation_classifier import ColumnsOrientationClassifier from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader -from dedoc.readers.pdf_reader.pdf_image_reader.scan_rotator import ScanRotator from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_txtlayer_reader import PdfTxtlayerReader from tests.test_utils import get_test_config @@ -27,8 +27,8 @@ def _split_lines_on_pages(self, lines: List[LineWithMeta]) -> List[List[str]]: return lines_by_page def test_scan_rotator(self) -> None: - scan_rotator = ScanRotator(config=get_test_config()) - imgs_path = [f"../data/scan_rotator/rotated_{i}.jpg" for i in range(1, 5)] + skew_corrector = SkewCorrector() + imgs_path = [f"../data/skew_corrector/rotated_{i}.jpg" for i in range(1, 5)] angles = [0.061732858955328755, -0.017535263190370427, 0.12228411148417097, 0] for i in range(len(imgs_path)): @@ -36,11 +36,12 @@ def test_scan_rotator(self) -> None: image = cv2.imread(path) _, orientation = self.orientation_classifier.predict(image) angle_predict = self.orientation_classifier.classes[2 + orientation] - rotated, angle = scan_rotator.auto_rotate(image, angle_predict) + rotated, angle = skew_corrector.preprocess(image, {"orientation_angle": angle_predict}) + angle = angle["rotated_angle"] self.assertAlmostEqual(angle, angles[i], delta=8) def test_scan_orientation(self) -> None: - scan_rotator = ScanRotator(config=get_test_config()) + skew_corrector = SkewCorrector() imgs_path = [f"../data/scanned/orient_{i}.png"for i in range(1, 9)] angles = [90.0, 90.0, 270.0, 270.0, 180.0, 270.0, 180.0, 270.0] max_delta = 10.0 @@ -48,7 +49,8 @@ def test_scan_orientation(self) -> None: path = os.path.join(os.path.dirname(__file__), imgs_path[i]) image = cv2.imread(path) _, angle_predict = self.orientation_classifier.predict(image) - rotated, angle = scan_rotator.auto_rotate(image, angle_predict) + rotated, angle = skew_corrector.preprocess(image, {"orientation_angle": angle_predict}) + angle = angle["rotated_angle"] self.assertTrue(abs(angle - angles[i]) < max_delta) def test_header_footer_search(self) -> None: diff --git a/tests/unit_tests/test_module_scan_rotator.py b/tests/unit_tests/test_module_scan_rotator.py index 8de869dc..89a52259 100644 --- a/tests/unit_tests/test_module_scan_rotator.py +++ b/tests/unit_tests/test_module_scan_rotator.py @@ -2,21 +2,20 @@ import unittest import cv2 - -from dedoc.readers.pdf_reader.pdf_image_reader.scan_rotator import ScanRotator -from tests.test_utils import get_test_config +from dedocutils.preprocessing import SkewCorrector class TestScanRotator(unittest.TestCase): - rotator = ScanRotator(config=get_test_config()) + skew_corrector = SkewCorrector() def _get_abs_path(self, file_name: str) -> str: data_directory_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data")) - return os.path.join(data_directory_path, "scan_rotator", file_name) + return os.path.join(data_directory_path, "skew_corrector", file_name) def test_documents_with_short_lines(self) -> None: for i in range(1, 6): file_name = f"short_lines-{i}.png" img = cv2.imread(self._get_abs_path(file_name)) - image, angle = self.rotator.auto_rotate(img) + image, angle = self.skew_corrector.preprocess(img) + angle = angle['orientation_angle'] self.assertEqual(0, angle)