Fix draw coordinates conversion

ispras · Sep 19, 2023 · 5df1964 · 5df1964
1 parent 3063f0d
commit 5df1964
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 9 deletions.
diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_utils.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_utils.py
@@ -40,10 +40,10 @@ def draw_annotation(image: np.ndarray, annotations: List[BBoxAnnotation]) -> Non
 
 
 def convert_coordinates_pdf_to_image(lobj: LTContainer, k_w: float, k_h: float, height_page: int) -> BBox:
-    x0 = int(lobj.x0)
-    x1 = int(lobj.x1)
-    y0 = int((height_page - lobj.y1))
-    y1 = int((height_page - lobj.y0))
+    x0 = int(lobj.x0 * k_w)
+    x1 = int(lobj.x1 * k_w)
+    y0 = int((height_page - lobj.y1) * k_h)
+    y1 = int((height_page - lobj.y0) * k_h)
 
     return BBox(x0, y0, x1 - x0, y1 - y0)
 

diff --git a/dedoc/scripts/test_words_bbox_extraction.py b/dedoc/scripts/test_words_bbox_extraction.py
@@ -85,14 +85,14 @@ def __normalize_font_thickness(self, image: np.ndarray) -> Tuple[float, int]:
     def __draw_word_annotations(self, image: np.ndarray, word_annotations: List[BboxWithConfsType]) -> np.ndarray:
 
         font_scale, thickness = self.__normalize_font_thickness(image)
-        page_height, page_width, *_ = image.shape
+
         for ann in word_annotations:
             bbox = json.loads(ann.bbox)
-            p1 = (int(bbox["x_top_left"] * page_width), int(bbox["y_top_left"] * page_height))
-            p2 = (int((bbox["x_top_left"] + bbox["width"]) * page_width), int((bbox["y_top_left"] + bbox["height"]) * page_height))
+            p1 = (int(bbox["x_top_left"] * bbox["page_width"]), int(bbox["y_top_left"] * bbox["page_height"]))
+            p2 = (int((bbox["x_top_left"] + bbox["width"]) * bbox["page_width"]), int((bbox["y_top_left"] + bbox["height"]) * bbox["page_height"]))
             cv2.rectangle(image, p1, p2, (0, 255, 0) if ann.text_type == "typewritten" else (255, 0, 0))
             text = ",".join(ann.confs) if ann.confs != [] else "None"
-            cv2.putText(image, text, (int(bbox["x_top_left"] * page_width), int(bbox["y_top_left"] * page_height)),
+            cv2.putText(image, text, (int(bbox["x_top_left"] * bbox["page_width"]), int(bbox["y_top_left"] * bbox["page_height"])),
                         cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), thickness)
         return image
 
@@ -113,8 +113,12 @@ def test_tabby_document(self):
         file_name = "pdf_with_text_layer/english_doc.pdf"
         result = self._send_request(file_name, data=dict(pdf_with_text_layer="tabby"))
         structure = result["content"]["structure"]
-        word_annotations = self.__get_words_annotation(structure)
         image = np.asarray(get_page_image(self._get_abs_path(file_name), 0))
+        word_annotations = self.__get_words_annotation(structure)
+        ann = word_annotations[0]
+        if ann is not None:
+            bbox = json.loads(ann.bbox)
+            image = cv2.resize(image, dsize=(bbox["page_width"], bbox["page_height"]), interpolation=cv2.INTER_CUBIC)
 
         image = self.__draw_word_annotations(image, word_annotations)
         cv2.imwrite(os.path.join(output_path, f"{os.path.split(file_name)[1]}.png"), image)