Skip to content

Commit

Permalink
TLDR-474 remove is_inserted attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
NastyBoget committed Sep 28, 2023
1 parent 78d8f6a commit 436bb36
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 64 deletions.
1 change: 0 additions & 1 deletion dedoc/api/static/html_eng/format_description.html
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ <h4 id="TableMetadata"> TableMetadata</h4>
<ol>
<li><strong>uid</strong>: <a> str (required field) </a> - unique identifier. </li>
<li><strong>page_id</strong>: <a> integer </a> (optional field) - page number on which the table begins. Can be null.</li>
<li><strong>is_inserted</strong>: <a> bool </a> (optional field) - was table inserted into document.</li>
</ol>

<h3 id="TreeNode"> TreeNode</h3>
Expand Down
1 change: 0 additions & 1 deletion dedoc/api/static/html_rus/format_description.html
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ <h4 id="TableMetadata"> TableMetadata. Метаинформация таблиц
<ol>
<li><strong>uid</strong>: <a> str (обязательное поле) </a> - уникальный идентификатор таблицы.</li>
<li><strong>page_id</strong>: <a> int </a> (необязательное поле) - номер страницы на которой начинается таблица.</li>
<li><strong>is_inserted</strong>: <a> bool </a> (необязательное поле) - была ли таблица встроена в тело документа.</li>
</ol>

<h3 id="TreeNode"> TreeNode. Древовидная структура документа.</h3>
Expand Down
6 changes: 1 addition & 5 deletions dedoc/data_structures/table_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,20 @@ class TableMetadata(Serializable):
"""
This class holds the information about table unique identifier, rotation angle (if table has been rotated - for images) and so on.
"""
def __init__(self, page_id: Optional[int], uid: Optional[str] = None, is_inserted: bool = False, rotated_angle: float = 0.0) -> None:
def __init__(self, page_id: Optional[int], uid: Optional[str] = None, rotated_angle: float = 0.0) -> None:
"""
:param page_id: number of the page where table starts
:param uid: unique identifier of the table
:param is_inserted: indicator if table was already inserted into paragraphs list
:param rotated_angle: value of the rotation angle by which the table was rotated during recognition
"""
self.page_id = page_id
self.uid = str(uuid.uuid1()) if not uid else uid
self.is_inserted = is_inserted
self.rotated_angle = rotated_angle

def to_dict(self) -> dict:
res = OrderedDict()
res["uid"] = self.uid
res["page_id"] = self.page_id
res["is_inserted"] = self.is_inserted
res["rotated_angle"] = self.rotated_angle
return res

Expand All @@ -36,6 +33,5 @@ def get_api_dict(api: Api) -> Model:
return api.model("TableMetadata", {
"page_id": fields.Integer(readonly=False, description="table start page number"),
"uid": fields.String(description="table unique id"),
"is_inserted": fields.Boolean(description="was the table inserted into document body"),
"rotated_angle": fields.Float(readonly=False, description="At what angle should the table be rotated to use boxes")
})
2 changes: 1 addition & 1 deletion dedoc/readers/docx_reader/data_structures/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def to_table(self) -> Table:
result_row.append(cell)
result_cells_with_meta.append(result_row)

return Table(cells=result_cells_with_meta, metadata=TableMetadata(page_id=None, uid=self.uid, is_inserted=False))
return Table(cells=result_cells_with_meta, metadata=TableMetadata(page_id=None, uid=self.uid))

def __get_cell_text(self, cell: Tag) -> str:
cell_text = ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def __get_tables(self, page: dict, file_hash: str) -> Tuple[List[Table], List[Sc

result_cells.append(result_row)
table_bbox = BBox.from_two_points((x_top_left, y_top_left), (x_bottom_right, y_bottom_right)) # noqa TODO add table location into TableMetadata
tables.append(Table(cells=result_cells, metadata=TableMetadata(page_id=page_number, is_inserted=False)))
tables.append(Table(cells=result_cells, metadata=TableMetadata(page_id=page_number)))
table_name = file_hash + str(page_number) + str(table_num)
tables_on_image.append(ScanTable(page_number=page_number, matrix_cells=None, bbox=table_bbox, name=table_name, order=order))

Expand Down
13 changes: 6 additions & 7 deletions docs/source/_static/json_format_examples/basic_example.json
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
"start": 0,
"end": 14,
"name": "attachment",
"value": "attach_fa1143ae-5d3c-11ee-b518-0242ac120002"
"value": "attach_75af2486-5df1-11ee-bfc1-0242ac120002"
}
],
"metadata": {
Expand Down Expand Up @@ -420,20 +420,19 @@
"metadata": {
"uid": "3a327789721e09b3fa6fd9560f3ee263",
"page_id": null,
"is_inserted": false,
"rotated_angle": 0.0
}
}
]
},
"metadata": {
"uid": "doc_uid_auto_fa1f6786-5d3c-11ee-b518-0242ac120002",
"uid": "doc_uid_auto_75c93394-5df1-11ee-bfc1-0242ac120002",
"file_name": "example_return_format.docx",
"temporary_file_name": "1695822696_268.docx",
"temporary_file_name": "1695900213_314.docx",
"size": 21270,
"modified_time": 1695822696,
"created_time": 1695822696,
"access_time": 1695822696,
"modified_time": 1695900213,
"created_time": 1695900213,
"access_time": 1695900213,
"file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"document_subject": "",
"keywords": "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@
"start": 0,
"end": 14,
"name": "attachment",
"value": "attach_fa23fd78-5d3c-11ee-b518-0242ac120002"
"value": "attach_75d13b70-5df1-11ee-bfc1-0242ac120002"
}
],
"metadata": {
Expand Down Expand Up @@ -504,20 +504,19 @@
"metadata": {
"uid": "3a327789721e09b3fa6fd9560f3ee263",
"page_id": null,
"is_inserted": false,
"rotated_angle": 0.0
}
}
]
},
"metadata": {
"uid": "doc_uid_auto_fa309d08-5d3c-11ee-b518-0242ac120002",
"uid": "doc_uid_auto_75e45e94-5df1-11ee-bfc1-0242ac120002",
"file_name": "example_return_format.docx",
"temporary_file_name": "1695822697_827.docx",
"temporary_file_name": "1695900214_259.docx",
"size": 21270,
"modified_time": 1695822697,
"created_time": 1695822697,
"access_time": 1695822697,
"modified_time": 1695900213,
"created_time": 1695900213,
"access_time": 1695900214,
"file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"document_subject": "",
"keywords": "",
Expand Down
23 changes: 11 additions & 12 deletions docs/source/_static/json_format_examples/with_attachments.json
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
"start": 0,
"end": 14,
"name": "attachment",
"value": "attach_fa355abe-5d3c-11ee-b518-0242ac120002"
"value": "attach_75ea598e-5df1-11ee-bfc1-0242ac120002"
}
],
"metadata": {
Expand Down Expand Up @@ -420,20 +420,19 @@
"metadata": {
"uid": "3a327789721e09b3fa6fd9560f3ee263",
"page_id": null,
"is_inserted": false,
"rotated_angle": 0.0
}
}
]
},
"metadata": {
"uid": "doc_uid_auto_fa4285e0-5d3c-11ee-b518-0242ac120002",
"uid": "doc_uid_auto_75fac01c-5df1-11ee-bfc1-0242ac120002",
"file_name": "example_return_format.docx",
"temporary_file_name": "1695822697_953.docx",
"temporary_file_name": "1695900214_51.docx",
"size": 21270,
"modified_time": 1695822697,
"created_time": 1695822697,
"access_time": 1695822697,
"modified_time": 1695900214,
"created_time": 1695900214,
"access_time": 1695900214,
"file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"document_subject": "",
"keywords": "",
Expand Down Expand Up @@ -476,13 +475,13 @@
"tables": []
},
"metadata": {
"uid": "attach_fa355abe-5d3c-11ee-b518-0242ac120002",
"uid": "attach_75ea598e-5df1-11ee-bfc1-0242ac120002",
"file_name": "image1.png",
"temporary_file_name": "1695822697_181.png",
"temporary_file_name": "1695900214_864.png",
"size": 14874,
"modified_time": 1695822697,
"created_time": 1695822697,
"access_time": 1695822697,
"modified_time": 1695900214,
"created_time": 1695900214,
"access_time": 1695900214,
"file_type": "image/png",
"other_fields": {}
},
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
"start": 0,
"end": 14,
"name": "attachment",
"value": "attach_fa5c54ac-5d3c-11ee-b518-0242ac120002"
"value": "attach_7619127e-5df1-11ee-bfc1-0242ac120002"
}
],
"metadata": {
Expand Down Expand Up @@ -420,20 +420,19 @@
"metadata": {
"uid": "3a327789721e09b3fa6fd9560f3ee263",
"page_id": null,
"is_inserted": false,
"rotated_angle": 0.0
}
}
]
},
"metadata": {
"uid": "doc_uid_auto_fa7fdbc0-5d3c-11ee-b518-0242ac120002",
"uid": "doc_uid_auto_7630e7b4-5df1-11ee-bfc1-0242ac120002",
"file_name": "example_return_format.docx",
"temporary_file_name": "1695822697_469.docx",
"temporary_file_name": "1695900214_402.docx",
"size": 21270,
"modified_time": 1695822697,
"created_time": 1695822697,
"access_time": 1695822697,
"modified_time": 1695900214,
"created_time": 1695900214,
"access_time": 1695900214,
"file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"document_subject": "",
"keywords": "",
Expand Down Expand Up @@ -913,13 +912,13 @@
"tables": []
},
"metadata": {
"uid": "attach_fa5c54ac-5d3c-11ee-b518-0242ac120002",
"uid": "attach_7619127e-5df1-11ee-bfc1-0242ac120002",
"file_name": "image1.png",
"temporary_file_name": "1695822697_301.png",
"temporary_file_name": "1695900214_972.png",
"size": 14874,
"modified_time": 1695822697,
"created_time": 1695822697,
"access_time": 1695822697,
"modified_time": 1695900214,
"created_time": 1695900214,
"access_time": 1695900214,
"file_type": "image/png",
"rotated_page_angles": [
0
Expand Down
10 changes: 5 additions & 5 deletions docs/source/dedoc_api_usage/return_format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ The beginning of the document's metadata:

.. literalinclude:: ../_static/json_format_examples/basic_example.json
:language: json
:lines: 429-437
:lines: 428-436

The document's attachments:

.. literalinclude:: ../_static/json_format_examples/basic_example.json
:language: json
:lines: 459
:lines: 458

As we see, the `attachments` field is empty because the option
`with_attachments` is set to `"false"` by default (see :ref:`table_parameters`).
Expand Down Expand Up @@ -118,7 +118,7 @@ Unlike the previous examples, in this case we have `attachments` field filled:

.. literalinclude:: ../_static/json_format_examples/with_attachments.json
:language: json
:lines: 459-491
:lines: 458-490

Example with base64 attachments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -136,7 +136,7 @@ The only difference is in the attachment's metadata: attachment's content is enc

.. literalinclude:: ../_static/json_format_examples/with_base64_attachments.json
:language: json
:lines: 459-494
:lines: 458-493

Example with parsed attachments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -156,4 +156,4 @@ The beginning of the document's attachments:

.. literalinclude:: ../_static/json_format_examples/with_parsed_attachments.json
:language: json
:lines: 459-484
:lines: 458-483

0 comments on commit 436bb36

Please sign in to comment.