Skip to content

Commit

Permalink
TLDR-483 fixed box extraction from cropped cells (#343)
Browse files Browse the repository at this point in the history
  • Loading branch information
oksidgy authored Oct 2, 2023
1 parent 58ce902 commit 840bed2
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 5 deletions.
3 changes: 2 additions & 1 deletion dedoc/data_structures/line_with_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

from dedoc.data_structures.annotation import Annotation
from dedoc.data_structures.line_metadata import LineMetadata
from dedoc.data_structures.serializable import Serializable
from dedoc.utils.annotation_merger import AnnotationMerger


class LineWithMeta(Sized):
class LineWithMeta(Sized, Serializable):
"""
Structural unit of document - line (or paragraph) of text and its metadata.
One LineWithMeta should not contain text from different logical parts of the document
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def get_cells_text(self, page_image: np.ndarray, tree_nodes: List["TableTree"],
word.bbox.y_top_left -= chunk_boxes[chunk_index].y_top_left
word.bbox.x_top_left -= chunk_boxes[chunk_index].x_top_left
# do absolute coordinate on src_image (inside src_image)
word.bbox.y_top_left += nodes_batch[chunk_index].cell_box.y_top_left
word.bbox.x_top_left += nodes_batch[chunk_index].cell_box.x_top_left
word.bbox.y_top_left += nodes_batch[chunk_index].crop_text_box.y_top_left
word.bbox.x_top_left += nodes_batch[chunk_index].crop_text_box.x_top_left

originalbox_to_fastocrbox[nodes_batch[chunk_index].cell_box].append(line.words)

Expand Down
3 changes: 1 addition & 2 deletions dedoc/scripts/test_words_bbox_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_table_word_extraction(self):
os.makedirs(output_path, exist_ok=True)
file_names = ["tables/example_with_table5.png", "tables/example_with_table3.png", "tables/example_with_table4.jpg",
"tables/example_with_table6.png", "tables/example_with_table_horizontal_union.jpg",
"scanned/orient_1.png"]
"scanned/orient_1.png", "tables/rotated_table.png"]

for file_name in file_names:
result = self._send_request(file_name, data=dict())
Expand All @@ -173,4 +173,3 @@ def test_table_word_extraction(self):

image = self.draw_word_annotations(image, word_annotations, angle=table_angle)
cv2.imwrite(os.path.join(output_path, file_name.split('/')[-1]), image)

Binary file added tests/data/tables/rotated_table.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 840bed2

Please sign in to comment.