From a3faf675c05cff6d7a9679307b71ba72a9648718 Mon Sep 17 00:00:00 2001 From: Belyaeva Oksana Date: Thu, 5 Oct 2023 18:09:11 +0300 Subject: [PATCH] TLDR-490 fixes after review --- dedoc/scripts/test_words_bbox_extraction.py | 8 +++--- .../test_api_format_pdf_with_text.py | 27 ++++++++++++++----- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/dedoc/scripts/test_words_bbox_extraction.py b/dedoc/scripts/test_words_bbox_extraction.py index e8f6ecf8..a2a07bcc 100644 --- a/dedoc/scripts/test_words_bbox_extraction.py +++ b/dedoc/scripts/test_words_bbox_extraction.py @@ -174,9 +174,10 @@ def test_table_word_extraction(self): image = self.__draw_word_annotations(image, word_annotations, angle=table_angle) cv2.imwrite(os.path.join(output_path, file_name.split('/')[-1]), image) - def test_document_pipeline_reader(self) -> None: + def test_document_image_reader(self) -> None: filename_to_parameters = { - "scanned/scan_orient_1.jpg": {} + "scanned/scan_orient_1.jpg": {}, + "skew_corrector/rotated_2.jpg": {} } output_path = os.path.join(self.output_path, "document_pipeline_readers") os.makedirs(output_path, exist_ok=True) @@ -188,6 +189,3 @@ def test_document_pipeline_reader(self) -> None: image = rotate_image(image, result["metadata"]["other_fields"].get("rotated_page_angles", [0.])[0]) image = self.__draw_word_annotations(image, word_annotations) cv2.imwrite(os.path.join(output_path, filename.split("/")[-1]), image) - - - diff --git a/tests/api_tests/test_api_format_pdf_with_text.py b/tests/api_tests/test_api_format_pdf_with_text.py index 5f266e5f..fa9ef429 100644 --- a/tests/api_tests/test_api_format_pdf_with_text.py +++ b/tests/api_tests/test_api_format_pdf_with_text.py @@ -15,14 +15,29 @@ def __filter_by_name(self, annotations: List[dict], name: str) -> List[dict]: def __get_annotation_names(self, annotations: List[dict]) -> List[str]: return [annotation["name"] for annotation in annotations] - def test_pdf_tables_uuid(self) -> None: - file_name = "example.pdf" - parameters = [dict(pdf_with_text_layer="true"), dict(pdf_with_text_layer="tabby"), dict(pdf_with_text_layer="false")] - for param in parameters: - result = self._send_request(file_name, param) + def __extract_node_with_annotation(self, tree: dict, node_id: str, ann_name: str) -> List[dict]: + node_with_annotation = self._get_by_tree_path(tree["content"]["structure"], node_id) + return self.__filter_by_name(node_with_annotation["annotations"], ann_name) + + def test_ref_tables(self) -> None: + result = self._send_request("example.pdf", dict(pdf_with_text_layer="true")) + tables_uids = [table["metadata"]["uid"] for table in result["content"]["tables"]] + self.assertEqual(len(tables_uids), 2) + ref0 = self.__extract_node_with_annotation(result, "0.2.2", "table")[0]["value"] + ref1 = self.__extract_node_with_annotation(result, "0.2.2.0", "table")[0]["value"] + self.assertEqual(ref0, tables_uids[0]) + self.assertEqual(ref1, tables_uids[1]) + + params = [dict(pdf_with_text_layer="tabby"), dict(pdf_with_text_layer="false")] + for param in params: + result = self._send_request("example.pdf", param) tables_uids = [table["metadata"]["uid"] for table in result["content"]["tables"]] self.assertEqual(len(tables_uids), 2) - self.assertTrue(tables_uids[0] != tables_uids[1]) + annotations = self.__extract_node_with_annotation(result, "0.2.2", "table") + ref0 = annotations[0]["value"] + ref1 = annotations[1]["value"] + self.assertEqual(ref0, tables_uids[0]) + self.assertEqual(ref1, tables_uids[1]) def test_pdf_with_text_style(self) -> None: file_name = "diff_styles.pdf"