From b61354b4212921c2b41dc3a54917f2a05dc01612 Mon Sep 17 00:00:00 2001 From: Belyaeva Oksana Date: Thu, 28 Sep 2023 17:00:28 +0300 Subject: [PATCH] ESL-470 fixed rotation operation of table word boxes rotates a table image and saving image.shape during rotation. It is important for word bounding box extraction --- .../table_recognizer/table_utils/img_processing.py | 5 +++-- dedoc/scripts/test_words_bbox_extraction.py | 10 ++++++---- dedoc/utils/image_utils.py | 3 ++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py index 9f85bc5d..b24c1a53 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py @@ -4,12 +4,12 @@ from typing import Any, List, Tuple import cv2 +import imutils import numpy as np from dedoc.config import get_config from dedoc.readers.pdf_reader.data_classes.tables.table_tree import TableTree from dedoc.readers.pdf_reader.data_classes.tables.table_type import TableTypeAdditionalOptions -from dedoc.utils.image_utils import rotate_image logger = get_config().get("logger", logging.getLogger()) logger = logger if logger else logging.getLogger("TableRecognizer.detect_tables_by_contours") @@ -17,13 +17,14 @@ def rotate_with_threshold(img: np.ndarray, angle: float, threshold: float = None, *, config: dict) -> np.ndarray: + """rotates a table image and saving image.shape during rotation. It is important for word bounding box extraction""" if threshold is None: threshold = config["rotate_threshold"] rotated = img if abs(angle) > threshold: if config.get("debug_mode", False): logger.debug("rotated image") - rotated = rotate_image(img, angle) + rotated = imutils.rotate(img, angle) return rotated diff --git a/dedoc/scripts/test_words_bbox_extraction.py b/dedoc/scripts/test_words_bbox_extraction.py index 1f6394f7..6515e6fe 100644 --- a/dedoc/scripts/test_words_bbox_extraction.py +++ b/dedoc/scripts/test_words_bbox_extraction.py @@ -118,7 +118,7 @@ def draw_word_annotations(self, image: np.ndarray, word_annotations: List[BboxWi p2 = (int((bbox["x_top_left"] + bbox["width"]) * bbox["page_width"]), int((bbox["y_top_left"] + bbox["height"]) * bbox["page_height"])) if angle != 0.0: - p1 = self.rotate_coordinate(p1[0], p1[1], x_c, y_c, angle) # TODO x_c, y_c нужен четкий + p1 = self.rotate_coordinate(p1[0], p1[1], x_c, y_c, angle) p2 = self.rotate_coordinate(p2[0], p2[1], x_c, y_c, angle) cv2.rectangle(image, p1, p2, (0, 255, 0) if ann.text_type == "typewritten" else (255, 0, 0)) @@ -155,10 +155,12 @@ def test_tabby_document(self): cv2.imwrite(os.path.join(output_path, f"{os.path.split(file_name)[1]}.png"), image) def test_table_word_extraction(self): - output_path = os.path.join(self.output_path) + output_path = os.path.join(self.output_path, 'tables') os.makedirs(output_path, exist_ok=True) file_names = ["tables/example_with_table5.png", "tables/example_with_table3.png", "tables/example_with_table4.jpg", - "tables/example_with_table6.png", "tables/example_with_table_horizontal_union.jpg"] + "tables/example_with_table6.png", "tables/example_with_table_horizontal_union.jpg", + "scanned/orient_1.png"] + for file_name in file_names: result = self._send_request(file_name, data=dict()) table0 = result["content"]["tables"][0] @@ -170,5 +172,5 @@ def test_table_word_extraction(self): image = rotate_image(image, page_angle) image = self.draw_word_annotations(image, word_annotations, angle=table_angle) - cv2.imwrite(os.path.join(output_path, file_name), image) + cv2.imwrite(os.path.join(output_path, file_name.split('/')[-1]), image) diff --git a/dedoc/utils/image_utils.py b/dedoc/utils/image_utils.py index f7bd62ad..1628064b 100644 --- a/dedoc/utils/image_utils.py +++ b/dedoc/utils/image_utils.py @@ -39,7 +39,8 @@ def get_bbox_from_image(image: Image, bbox: BBox, resize: Tuple[int, int] = (300 def rotate_image(image: np.ndarray, angle: float, color_bound: Tuple[int, int, int] = (255, 255, 255)) -> np.ndarray: """ - Rotates an image (angle in degrees) and expands image to avoid cropping + Rotates an image (angle in degrees) and expands image to avoid cropping (do bounds of color_bound) + Changes width and height of image (image.shape != rotated_image.shape) """ height, width = image.shape[:2] image_center = (width / 2, height / 2)