Skip to content

Commit

Permalink
TLDR-490 changed uuid1 on uuid4; fixed bug in tabby's table uuid (#345)
Browse files Browse the repository at this point in the history
* TLDR-490 changed uuid1 on uuid4; fixed bug in tabby's table uuid

* TLDR-490 fixes after review
  • Loading branch information
oksidgy authored and sunveil committed Oct 11, 2023
1 parent fc3effc commit 81daef9
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import math
import os
import subprocess
import uuid
from typing import List, Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -118,19 +119,19 @@ def __extract(self, path: str, start_page: int = None, end_page: int = None) ->
page_lines = self.__get_lines_with_location(page, file_hash)
if page_lines:
all_lines.extend(page_lines)
page_tables, table_on_images = self.__get_tables(page, file_hash)
page_tables, table_on_images = self.__get_tables(page)
assert len(page_tables) == len(table_on_images)
if page_tables:
all_tables.extend(page_tables)
all_tables_on_images.extend(table_on_images)

return all_lines, all_tables, all_tables_on_images

def __get_tables(self, page: dict, file_hash: str) -> Tuple[List[Table], List[ScanTable]]:
def __get_tables(self, page: dict) -> Tuple[List[Table], List[ScanTable]]:
tables = []
tables_on_image = []
page_number = page["number"]
for table_num, table in enumerate(page["tables"]):
for table in page["tables"]:
x_top_left = table["x_top_left"]
y_top_left = table["y_top_left"]
x_bottom_right = x_top_left + table["width"]
Expand All @@ -152,8 +153,8 @@ def __get_tables(self, page: dict, file_hash: str) -> Tuple[List[Table], List[Sc

result_cells.append(result_row)
table_bbox = BBox.from_two_points((x_top_left, y_top_left), (x_bottom_right, y_bottom_right)) # noqa TODO add table location into TableMetadata
tables.append(Table(cells=result_cells, metadata=TableMetadata(page_id=page_number)))
table_name = file_hash + str(page_number) + str(table_num)
table_name = str(uuid.uuid4())
tables.append(Table(cells=result_cells, metadata=TableMetadata(page_id=page_number, uid=table_name)))
tables_on_image.append(ScanTable(page_number=page_number, matrix_cells=None, bbox=table_bbox, name=table_name, order=order))

return tables, tables_on_image
Expand Down

0 comments on commit 81daef9

Please sign in to comment.