Skip to content

Commit

Permalink
TLDR-490 fixes after review
Browse files Browse the repository at this point in the history
  • Loading branch information
oksidgy committed Oct 6, 2023
1 parent 178c1cd commit a3faf67
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 11 deletions.
8 changes: 3 additions & 5 deletions dedoc/scripts/test_words_bbox_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,10 @@ def test_table_word_extraction(self):
image = self.__draw_word_annotations(image, word_annotations, angle=table_angle)
cv2.imwrite(os.path.join(output_path, file_name.split('/')[-1]), image)

def test_document_pipeline_reader(self) -> None:
def test_document_image_reader(self) -> None:
filename_to_parameters = {
"scanned/scan_orient_1.jpg": {}
"scanned/scan_orient_1.jpg": {},
"skew_corrector/rotated_2.jpg": {}
}
output_path = os.path.join(self.output_path, "document_pipeline_readers")
os.makedirs(output_path, exist_ok=True)
Expand All @@ -188,6 +189,3 @@ def test_document_pipeline_reader(self) -> None:
image = rotate_image(image, result["metadata"]["other_fields"].get("rotated_page_angles", [0.])[0])
image = self.__draw_word_annotations(image, word_annotations)
cv2.imwrite(os.path.join(output_path, filename.split("/")[-1]), image)



27 changes: 21 additions & 6 deletions tests/api_tests/test_api_format_pdf_with_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,29 @@ def __filter_by_name(self, annotations: List[dict], name: str) -> List[dict]:
def __get_annotation_names(self, annotations: List[dict]) -> List[str]:
return [annotation["name"] for annotation in annotations]

def test_pdf_tables_uuid(self) -> None:
file_name = "example.pdf"
parameters = [dict(pdf_with_text_layer="true"), dict(pdf_with_text_layer="tabby"), dict(pdf_with_text_layer="false")]
for param in parameters:
result = self._send_request(file_name, param)
def __extract_node_with_annotation(self, tree: dict, node_id: str, ann_name: str) -> List[dict]:
node_with_annotation = self._get_by_tree_path(tree["content"]["structure"], node_id)
return self.__filter_by_name(node_with_annotation["annotations"], ann_name)

def test_ref_tables(self) -> None:
result = self._send_request("example.pdf", dict(pdf_with_text_layer="true"))
tables_uids = [table["metadata"]["uid"] for table in result["content"]["tables"]]
self.assertEqual(len(tables_uids), 2)
ref0 = self.__extract_node_with_annotation(result, "0.2.2", "table")[0]["value"]
ref1 = self.__extract_node_with_annotation(result, "0.2.2.0", "table")[0]["value"]
self.assertEqual(ref0, tables_uids[0])
self.assertEqual(ref1, tables_uids[1])

params = [dict(pdf_with_text_layer="tabby"), dict(pdf_with_text_layer="false")]
for param in params:
result = self._send_request("example.pdf", param)
tables_uids = [table["metadata"]["uid"] for table in result["content"]["tables"]]
self.assertEqual(len(tables_uids), 2)
self.assertTrue(tables_uids[0] != tables_uids[1])
annotations = self.__extract_node_with_annotation(result, "0.2.2", "table")
ref0 = annotations[0]["value"]
ref1 = annotations[1]["value"]
self.assertEqual(ref0, tables_uids[0])
self.assertEqual(ref1, tables_uids[1])

def test_pdf_with_text_style(self) -> None:
file_name = "diff_styles.pdf"
Expand Down

0 comments on commit a3faf67

Please sign in to comment.