From d752ad2e4f8f517bfe2266cbc075c541cfe191a0 Mon Sep 17 00:00:00 2001 From: Nasty Date: Thu, 28 Sep 2023 16:27:44 +0300 Subject: [PATCH] TLDR-475 fix table documentation (#338) * TLDR-475 fix table documentation * Small fixes --- dedoc/data_structures/cell_with_meta.py | 13 +- dedoc/data_structures/table.py | 4 +- dedoc/data_structures/table_metadata.py | 5 +- .../code_examples/dedoc_usage_tutorial.py | 12 +- .../json_format_examples/basic_example.json | 168 +++--- .../linear_structure_type.json | 168 +++--- .../with_attachments.json | 179 ++++--- .../with_base64_attachments.json | 179 ++++--- .../with_inserted_table.json | 168 +++--- .../with_parsed_attachments.json | 506 +++++++++++++++--- docs/source/dedoc_api_usage/return_format.rst | 12 +- docs/source/getting_started/usage.rst | 10 +- examples/README.md | 13 +- examples/create_structured_document.py | 3 +- examples/create_unstructured_document.py | 48 +- examples/example_doc_parser.py | 14 +- examples/example_img_parser.py | 14 +- examples/example_manager_input.py | 3 +- examples/example_pdf_parser.py | 22 +- examples/example_post.py | 3 +- 20 files changed, 1018 insertions(+), 526 deletions(-) diff --git a/dedoc/data_structures/cell_with_meta.py b/dedoc/data_structures/cell_with_meta.py index ab800f87..2ccbdd38 100644 --- a/dedoc/data_structures/cell_with_meta.py +++ b/dedoc/data_structures/cell_with_meta.py @@ -9,20 +9,23 @@ class CellWithMeta: """ - This class holds the information about the cell information: text of the cell, text annotations and cell properties (rowspan, colspan, invisible). + This class holds the information about the cell: list of lines and cell properties (rowspan, colspan, invisible). """ def __init__(self, lines: List[LineWithMeta], colspan: int = 1, rowspan: int = 1, invisible: bool = False) -> None: """ - :param lines: text lines (LineWithMeta) of the cell - :param colspan: The value of the rowspan attribute represents the number of columns to span. Like HTML format. - :param rowspan: The value of the rowspan attribute represents the number of rows to span. Like HTML format. - :param invisible: Display or hide cell values + :param lines: textual lines of the cell + :param colspan: number of columns to span like in HTML format + :param rowspan: number of rows to span like in HTML format + :param invisible: indicator for displaying or hiding cell text """ self.lines = lines self.colspan = colspan self.rowspan = rowspan self.invisible = invisible + def __repr__(self) -> str: + return f"CellWithMeta({self.get_text()[:65]})" + def get_text(self) -> str: return "\n".join([line.line for line in self.lines]) diff --git a/dedoc/data_structures/table.py b/dedoc/data_structures/table.py index f04a04e4..c92125ea 100644 --- a/dedoc/data_structures/table.py +++ b/dedoc/data_structures/table.py @@ -16,8 +16,8 @@ class Table(Serializable): """ def __init__(self, cells: List[List[CellWithMeta]], metadata: TableMetadata) -> None: """ - :param cells: a list of lists of cells (cell has text, colspan and rowspan attributes). - :param metadata: some table metadata, as location, size and so on. + :param cells: a list of lists of cells (cell has text, colspan and rowspan attributes) + :param metadata: some table metadata as location, size and so on """ self.metadata = metadata self.cells = cells diff --git a/dedoc/data_structures/table_metadata.py b/dedoc/data_structures/table_metadata.py index 0ea8d1c3..3c9ea615 100644 --- a/dedoc/data_structures/table_metadata.py +++ b/dedoc/data_structures/table_metadata.py @@ -9,15 +9,14 @@ class TableMetadata(Serializable): """ - This class holds the information about the table location in the document and information about cell properties. + This class holds the information about table unique identifier, rotation angle (if table has been rotated - for images) and so on. """ def __init__(self, page_id: Optional[int], uid: Optional[str] = None, is_inserted: bool = False, rotated_angle: float = 0.0) -> None: """ :param page_id: number of the page where table starts :param uid: unique identifier of the table :param is_inserted: indicator if table was already inserted into paragraphs list - :param rotated_angle: the value of the rotation angle by which the table was rotated during recognition. Extracted boxes from a table will need to - be rotated by this angle. + :param rotated_angle: value of the rotation angle by which the table was rotated during recognition """ self.page_id = page_id self.uid = str(uuid.uuid1()) if not uid else uid diff --git a/docs/source/_static/code_examples/dedoc_usage_tutorial.py b/docs/source/_static/code_examples/dedoc_usage_tutorial.py index 176c7fe5..3e0df596 100644 --- a/docs/source/_static/code_examples/dedoc_usage_tutorial.py +++ b/docs/source/_static/code_examples/dedoc_usage_tutorial.py @@ -48,13 +48,13 @@ document.lines[3].annotations[7] # Italic(6:12, True) document.lines[3].annotations[8] # Size(14:19, 10.0) -document.tables[0].cells[0][0].get_text() # N -document.tables[0].cells[1][3].get_text() # Cell3 -document.tables[1].cells[3][0].get_text() # 'Text 3' +cell = document.tables[0].cells[0][0] +cell # CellWithMeta(N) +cell.get_text() # N +cell.rowspan, cell.colspan, cell.invisible # (1, 1, False) document.tables[0].metadata.uid # f2f08354fc2dbcb5ded8885479f498a6 -document.tables[0].cells[0][0].colspan # 1 -document.tables[0].cells[0][0].rowspan # 1 -document.tables[0].cells[0][0].invisible # False +document.tables[0].metadata.page_id # None +document.tables[0].metadata.rotated_angle # 0.0 document.tables[1].cells[0][0].invisible # False document.tables[1].cells[0][1].invisible # True document.tables[1].cells[0][0].colspan # 2 diff --git a/docs/source/_static/json_format_examples/basic_example.json b/docs/source/_static/json_format_examples/basic_example.json index fafb554a..a470920e 100644 --- a/docs/source/_static/json_format_examples/basic_example.json +++ b/docs/source/_static/json_format_examples/basic_example.json @@ -1,5 +1,5 @@ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -298,7 +298,7 @@ "start": 0, "end": 14, "name": "attachment", - "value": "image1.png" + "value": "attach_fa1143ae-5d3c-11ee-b518-0242ac120002" } ], "metadata": { @@ -321,87 +321,119 @@ { "cells": [ [ - "Table header", - "Table header" + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 2, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + } ], [ - "Vertically merged cells", - "Text 1" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 2, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 1", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Vertically merged cells", - "Text 2" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + }, + { + "lines": [ + { + "text": "Text 2", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Text 3", - "Text 4" + { + "lines": [ + { + "text": "Text 3", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 4", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ] ], "metadata": { "uid": "3a327789721e09b3fa6fd9560f3ee263", "page_id": null, "is_inserted": false, - "cell_properties": [ - [ - { - "colspan": 2, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": true - } - ], - [ - { - "colspan": 1, - "rowspan": 2, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": true - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ] - ] + "rotated_angle": 0.0 } } ] }, "metadata": { - "uid": "doc_uid_auto_5cbfdc00-0e90-11ee-8789-4549ad8e7206", + "uid": "doc_uid_auto_fa1f6786-5d3c-11ee-b518-0242ac120002", "file_name": "example_return_format.docx", + "temporary_file_name": "1695822696_268.docx", "size": 21270, - "modified_time": 1687172368, - "created_time": 1687172368, - "access_time": 1687172368, + "modified_time": 1695822696, + "created_time": 1695822696, + "access_time": 1695822696, "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "document_subject": "", "keywords": "", @@ -409,8 +441,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null, "other_fields": { "document_subject": "", @@ -419,8 +451,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null } }, diff --git a/docs/source/_static/json_format_examples/linear_structure_type.json b/docs/source/_static/json_format_examples/linear_structure_type.json index c7a971e3..848053fb 100644 --- a/docs/source/_static/json_format_examples/linear_structure_type.json +++ b/docs/source/_static/json_format_examples/linear_structure_type.json @@ -1,5 +1,5 @@ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -388,7 +388,7 @@ "start": 0, "end": 14, "name": "attachment", - "value": "image1.png" + "value": "attach_fa23fd78-5d3c-11ee-b518-0242ac120002" } ], "metadata": { @@ -405,87 +405,119 @@ { "cells": [ [ - "Table header", - "Table header" + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 2, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + } ], [ - "Vertically merged cells", - "Text 1" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 2, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 1", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Vertically merged cells", - "Text 2" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + }, + { + "lines": [ + { + "text": "Text 2", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Text 3", - "Text 4" + { + "lines": [ + { + "text": "Text 3", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 4", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ] ], "metadata": { "uid": "3a327789721e09b3fa6fd9560f3ee263", "page_id": null, "is_inserted": false, - "cell_properties": [ - [ - { - "colspan": 2, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": true - } - ], - [ - { - "colspan": 1, - "rowspan": 2, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": true - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ] - ] + "rotated_angle": 0.0 } } ] }, "metadata": { - "uid": "doc_uid_auto_5ce69246-0e90-11ee-8789-4549ad8e7206", + "uid": "doc_uid_auto_fa309d08-5d3c-11ee-b518-0242ac120002", "file_name": "example_return_format.docx", + "temporary_file_name": "1695822697_827.docx", "size": 21270, - "modified_time": 1687172369, - "created_time": 1687172369, - "access_time": 1687172369, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "document_subject": "", "keywords": "", @@ -493,8 +525,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null, "other_fields": { "document_subject": "", @@ -503,8 +535,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null } }, diff --git a/docs/source/_static/json_format_examples/with_attachments.json b/docs/source/_static/json_format_examples/with_attachments.json index dc7bb9d7..053791ce 100644 --- a/docs/source/_static/json_format_examples/with_attachments.json +++ b/docs/source/_static/json_format_examples/with_attachments.json @@ -1,5 +1,5 @@ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -298,7 +298,7 @@ "start": 0, "end": 14, "name": "attachment", - "value": "image1.png" + "value": "attach_fa355abe-5d3c-11ee-b518-0242ac120002" } ], "metadata": { @@ -321,87 +321,119 @@ { "cells": [ [ - "Table header", - "Table header" + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 2, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + } ], [ - "Vertically merged cells", - "Text 1" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 2, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 1", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Vertically merged cells", - "Text 2" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + }, + { + "lines": [ + { + "text": "Text 2", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Text 3", - "Text 4" + { + "lines": [ + { + "text": "Text 3", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 4", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ] ], "metadata": { "uid": "3a327789721e09b3fa6fd9560f3ee263", "page_id": null, "is_inserted": false, - "cell_properties": [ - [ - { - "colspan": 2, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": true - } - ], - [ - { - "colspan": 1, - "rowspan": 2, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": true - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ] - ] + "rotated_angle": 0.0 } } ] }, "metadata": { - "uid": "doc_uid_auto_5d374ec0-0e90-11ee-8789-4549ad8e7206", + "uid": "doc_uid_auto_fa4285e0-5d3c-11ee-b518-0242ac120002", "file_name": "example_return_format.docx", + "temporary_file_name": "1695822697_953.docx", "size": 21270, - "modified_time": 1687172369, - "created_time": 1687172369, - "access_time": 1687172369, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "document_subject": "", "keywords": "", @@ -409,8 +441,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null, "other_fields": { "document_subject": "", @@ -419,14 +451,14 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null } }, "attachments": [ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -444,12 +476,13 @@ "tables": [] }, "metadata": { - "uid": "attach_5d36badc-0e90-11ee-8789-4549ad8e7206", + "uid": "attach_fa355abe-5d3c-11ee-b518-0242ac120002", "file_name": "image1.png", + "temporary_file_name": "1695822697_181.png", "size": 14874, - "modified_time": 1687172369, - "created_time": 1687172369, - "access_time": 1687172369, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "image/png", "other_fields": {} }, diff --git a/docs/source/_static/json_format_examples/with_base64_attachments.json b/docs/source/_static/json_format_examples/with_base64_attachments.json index afdf1f5f..4fa1a2d3 100644 --- a/docs/source/_static/json_format_examples/with_base64_attachments.json +++ b/docs/source/_static/json_format_examples/with_base64_attachments.json @@ -1,5 +1,5 @@ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -298,7 +298,7 @@ "start": 0, "end": 14, "name": "attachment", - "value": "image1.png" + "value": "attach_fa48dd8c-5d3c-11ee-b518-0242ac120002" } ], "metadata": { @@ -321,87 +321,119 @@ { "cells": [ [ - "Table header", - "Table header" + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 2, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + } ], [ - "Vertically merged cells", - "Text 1" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 2, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 1", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Vertically merged cells", - "Text 2" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + }, + { + "lines": [ + { + "text": "Text 2", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Text 3", - "Text 4" + { + "lines": [ + { + "text": "Text 3", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 4", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ] ], "metadata": { "uid": "3a327789721e09b3fa6fd9560f3ee263", "page_id": null, "is_inserted": false, - "cell_properties": [ - [ - { - "colspan": 2, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": true - } - ], - [ - { - "colspan": 1, - "rowspan": 2, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": true - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ] - ] + "rotated_angle": 0.0 } } ] }, "metadata": { - "uid": "doc_uid_auto_5d83dc9a-0e90-11ee-8789-4549ad8e7206", + "uid": "doc_uid_auto_fa562866-5d3c-11ee-b518-0242ac120002", "file_name": "example_return_format.docx", + "temporary_file_name": "1695822697_293.docx", "size": 21270, - "modified_time": 1687172370, - "created_time": 1687172370, - "access_time": 1687172370, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "document_subject": "", "keywords": "", @@ -409,8 +441,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null, "other_fields": { "document_subject": "", @@ -419,14 +451,14 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null } }, "attachments": [ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -444,12 +476,13 @@ "tables": [] }, "metadata": { - "uid": "attach_5d834af0-0e90-11ee-8789-4549ad8e7206", + "uid": "attach_fa48dd8c-5d3c-11ee-b518-0242ac120002", "file_name": "image1.png", + "temporary_file_name": "1695822697_915.png", "size": 14874, - "modified_time": 1687172370, - "created_time": 1687172370, - "access_time": 1687172370, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "image/png", "base64_encode": "iVBORw0KGgoAAAANSUhEUgAAAqsAAABqCAYAAACbMBjJAAAABHNCSVQICAgIfAhkiAAAABl0RVh0U29mdHdhcmUAZ25vbWUtc2NyZWVuc2hvdO8Dvz4AAAAuaVRYdENyZWF0aW9uIFRpbWUAAAAAANCf0YIgMTYg0LjRjtC9IDIwMjMgMTM6NTA6MDmtGTAQAAAgAElEQVR4nO3dd1QUZxcH4N/sLh2kWoiCioIgRUQEVETEFntXsGvssXexJ2qiiYlRE0WNsUXsvfcWjS12xS4WrKhIFXbnfn+ACri7DAvo6nefczieg/POvHPfOzOXqQIRERhjjDHGGNNDsk/dAcYYY4wxxjThYpUxxhhjjOktLlYZY4wxxpje4mKVMcYYY4zpLS5WGWOMMcaY3uJilTHGGGOM6S0uVhljjDHGmN7Sq2JVFEWIovipu8EYY4wxxvSEXhWr0dHRePDgwafuBmOMMcYY0xN6VawyxhhjjDGWGRerjDHGGGNMb3GxyhhjjDHG9BYXq4wxxhhjTG9xscoYY4wxxvQWF6uMMcYYY0xvcbHKGGOMMcb0FherjDHGGGNMb3GxyhhjjDHG9BYXq4wxxhhjTG9xscoYY4wxxvQWF6uMMcYYY0xvcbHKGGOMMcb0FherjDHGGGNMbynye4a3b9/WuW1cXBwKFy6cj71hjDHGGGOfs3wvVg0MDHRua2dnhxIlSuRjbxhjjDHG2OdMICL61J1gjDHGGGNMHb5nlTHGGGOM6S0uVhljjDHGmN7Sw2L1DdaHmcOwUDGULueBChUroZJXSRSSC5DblIGPry98fX3hW6kCnGwLI3TqFHRpGoCSpjLIDO3h07ADxm2+DxEA6Al2T+mMJn4OMJHJYFyiMhp1moTtMSq8PDobAzrWhouFDDIDG5R294CHhwfcy7vBpYwjiloaw9BlBP5Nk9JnQtyl9fixT1NU8/ZG5Wo1EFK7DkKqVYS7ZwDqhfbB91vupvcJpH7Z7m4o6/gVSrhWRYsh8/HvM9W7eee9r4QXK1vBWiaDcdBM3BF1j3fnrW8+mHfu+kd4cXAGerYOgpO5DIKggEOnDXia+WYUVRQih3dGM7/iMC7qjcYdhyHymirLMqXFO+uyZEYl4NekHXpN2Y6HItRSXl6GQR0awMtODkFuB8/67TFgyUUoAQAinp+Yj8FNq6CCd2UEVAlApfJl4VIxGC2nHMTrjL7F7p+mQ06q65CmXCmHMo7FYFekJDyqN0efqatwNlalpj0AJOPuntkY3CoIPhX9UT24JoKDAlDJpzpaDpqFPXeTNbQjvL6yEdO/bYZq3t7wCwxCzeBA+Fd0h5tnFdRt1QOz/43DrXVj8E2LyrBXCJBbu+PrjgOw4EymeapuY/247mhdrSRMC1dAo86DsexSWrYcMIDnuP8yYqyOiJu/VoepTIDc1gNft+uHRedTP1LeJeVx+2CMMZZnpHdSaF2HavT9ReW736juzaZgQ5Bxs78p/t1v0+j0mKrUbUsKUdopGu2mIFnRHrTrzYdzTP1nCJWRy6nUoCOUmvk/tLVL3kRdfEfQ8VTKQRJdnNucShlbk//AlXTxpTLT/4mUdO8IRXT2pNK991CWRahddgo92DeBaljLyKTCaDqWmE99FZ/T8uaFSABIMPSnadcy91GHeKuT2/6lnaLR7o5UztmCZPIS1HH9ExKzNXuzoxuVaJG5D0Q6xTuH/PiA+Iz+rG9EMKpHC56871XisXDytihJbRZdofi3vxbjKWpJGDl/HUGPM6+Arjmpjtp5iZT85Bxt+KEtlTeXkaJoTfru8IusMRSf0u5hvmRp4kodF/5HsZlCpXp1kZZ19yRzK18auit77FPoysLW5GRiS1WHrqErcarMM6XEe4fotxbOVG/eo/R2KWuotTHIsNoMuqUitd7s70tO9dTFyIBkMoHkjn1ob6L6tpRylIaWMyCZADKqm4s450ve5dP2wRhjTGd6eGYVgLwESpaQ5zQRijsWL7hTw4oScCyeUx+AhEPhaDVgM5RtFmHrr23hYZW5jQATh0B8MzwUbpLeu2CE4iHhmNq1NFIvROD3XQn50ld6vg2rj7ijdfNSkKX+h5WrriLLebiCjrem/skd0fnXSahh/hArBg7Dpmc5P+uXv/HOjWTsmzcPl78KxdCObjAX3i7SHOVahSLIUtDaOv8JMC5SAc1GReLY7nD4JBzExDa9sfrx2xgSnq7pj06/XoJz+Cr8+U1F2GQKlczSAx3mrsV3nlcxs2NfRD56H/ukY+PRuu96KEMXY8tPreBWSJZluaYOQejZtQbMVErk+elMhTtCahQDHqzG/K0v1MyPELtxNraWrgu/3L5oJL/yTh/2R4wx9n9MD/etRmi2cDnaW+U0nYBi3VdgbgOjgumG3BnNen0NR23HKHqEVdMW4jq545uhjWCnoV6Ru3TFz/19IO1Ya4ByHi5QUDxu3XoMTRd3pfeV8HTrahyr0BE/jAyFsyINF1etwoV311w/Qry19M/ApQ/+mBAEswcrMGDoJmitGwok3lK9wcsXSVA9vYbrsdk6adoYEcu/QdGPXa8CAARYVhmLWd+Wg+zJBkxbcDk9Z8TbWPbbBjwxqIZuXT3Ux0LhjI49asH02Wb8sijjDxh6gtU/zMUV8kT3IfVho2GdjILHYXrzwnnfgQjmqNm9I1xkL7BtwSo8yH43hOomFv9xFY37NdbYF43yJe/0ZH/EGGP/x/SwWAVkCoW0jskUUBTUGggWqFg/CF9pm3/8Yew8mgiZfRBqajuVZ2APNxcbSD3WqpQqAAJMTU2ktdHWV3qMLWtOolLbpihdKQxtyyugvLYakaff3+Ba4PHW1j/BAK79/sD4QFM8WDEAQzc903y2roDiLY05vHycIX+1Gf0b9sb8ow+R8n4lIC+wRJTCCJVaNUMZeRou79mHByJAL4/gwNk0yEv6wreopr4JsK7kBxdFGi7uO4BHIoCEw9hxOAEy++oIKqf5LzXB3BFl7POnMDOs1BXdKhsi6dBCLLma9c+zlGNzsVjeDb39dFhWPuWdXuyPGGPs/xjvWj+QhmOjmuO785of93hLde86bqYQ5CXLoGTOdwxIlIIL/12B0sAdwdWL5TBAOfdVjNmMNaf90LaJPQSFO0LDKsJAdQtrVx5HwT8KIjGWBuUxYO5YVDV5gBUDNF+WLZh4S6VAhYEzMbiiGeLOzEevoFKwd62DHj+sxtlnOedKgffOpTzKKQBV9C3cVQGq+3dwL40gsy+BYlqS6O3/q6JvIVoFqKKjcCOZIC/p9PFiLC+L9j1qw1x5HosXZspLeo4Ns7fDq29HlM5VX/I37xhjjH1aX1SxSrFr0N3TFa6uWX88wxbjvoanvz9s547WEXckXX4XE+KRKAKCqRlM8+M0XuoznP17MIYui0W53j/iW48Pj9C566uIh5vW4FzVtmhURAAgh0ubMPgbiri3LhJHND0Inge6xtLAfRDmjgmA8YO/MWCY+suy+R7vXBJsQjDtyFlsm9EbdZ3NEH99LxaGh8K3XFUM2f4Iap/n1zEnc83IFMYyAZSchGQCKCX9X8HIGEZaYiUYGcNYwLt2YkI8EkRAMPmYMRZg36IHmhUm3IlcgJ1x6b9V3ViMP6Iao1/jnM+SF2TeMcYY+7S+qGJVsG2NhRejEBWV9ediZBc4aFnTrO0uYmFrG0mBEUxM0g/0KclI/uAgR3i+/yf0aNMIdevURu069dC004/Yn+1+x3cH2XJOKGJVDJW67IHLtAM4NLOu2nv0ctVX8T42rrmE6m0bvru/U1aqFdoFGkOM2YjIA4kS1jJ3dI0lYAjPwXMR7meEB38PwPDNzz4o/vIj3nlmWgb1h8zFrqhHiD62AhNbucEs7hR+6zwAq55+uCxdczK3KDkBiSqCYG4BcwEQTM1hlhGrFC0heBtLwdQcpgIgmJjCRGOMC5BlPXQPKwXh6UYsWP8YhGQc/WMxFN16w1/CHQAFmXeMMcY+rS+qWM0fBnDxr4pixjmfVlKULIeyRgJU9+7g/gencQTYhQzHgr/HosLdgzh4vRyG/DkKIbZZ5/vuIHvtOo78GAxL8SGuRafCSNLIaO+rGL0Ba06l4syUEFT09oa3tze8fRpjznUDyOkpNq/YnfFu0IIiPZYAAKMKGDp3FHwNH2B5/+HYnK34y4945xvBBMUDwjBh9TFs6e8GWewubD6aknO7AqK6ewvRKgEKp3JwUgAKxzIoZSRAFXMfMVpOMYox9xEjClCUdkZpRXqMyxgL724LkEaATCYAoqi50BNFiAoFNF/NN0a1bzrDUxGPvQuW4/qjDZizwwvfdnLSYSeVv3nHGGPs0+Ji9QMylPlmMnqVkwNIwatXWgoQy5poWMMc4sODOHBN85FdkHTMVKBc3whMrW2GyzO7YcQuda/xyU1fRdxevxa3W/yF85fO4dy59z+X/vsDjQsRYrdHYsfLgjww5yKWGYwqDsfcET4wfLAcAyfuR2Lm7uVrvN9TXf0LP21Ufxn/vWScWLoSV7IvVrBE1Tb1UVKWhuRkSV+QKAAi7h08gBsqQ1SsF5L+VoJCQajjZwzx3kmcUvvBAQAgxJ46gRtKA3jXCUYRAYBlTTQJtoD48AD2XdFyz2fyKSxfdSX9UrvMFnY2MqiePsJTtcNCSHgYg1TbIrDQMjZy987oHmiC1FPzMXrATEQ17ofGuX4FAJDveccYY+yT4mJVC/HufHQZuR+pmiYQ7BE29lt4yC5i/vQteb/fTVEWPedNQ33L21jQvT/WPZZ+QfKDvqpuYv26h2jYvg4KZe+2bUO0q28N4dUuRG59nvd3ZerSP42MUWnkXAzzNsCTO9FZL0Xnd7wBAITnR9fj4GPK4b7IN7i2aRH2qvn01ZunT/FK5gQPN9P86FCu0cvdmDbnOJRftcXIrs7pZy9lDmg3JAwllP9i0V/n1T9Mp4zC4gUHkFK0BYZ2cUlvJxRD27H94aW4ggU/rMdjDTFWRm3FX4cepherBhVRO9gWFL0Va46rubUk7QaWLj4Dv9p+MNS2IjJHtO1RH1biDWzcZoxv+gQgr+8byJe8Y4wx9klxsarF6+NHcNPEQmsRY1plEtYt6gTLjd3QeMhaRMVnPcqlPL6Nh7k4TSMv3Q1zf2kCu5iV+LbXEtyVeCk2e19V19dhfWxTtA9WU0AJVqgX1gh2Qjz2rtiksSDJT1Ji+Y5JZYyeOwReap4Myu94A6k4e+I8DMzNcu5b2ilEjPsTJ5+8L33SHu3ChO83wbDxaPTyzvcvEWhHyYj5dzH6f90Oi+IDMXHVb2j67uWzAqwbzkDkeD88mN4WXeefQeYvsoqvLmBJz9b4LsoTI1bMQasi79fexH8C1i/uCrsdPdGoXyQuxWUt0FMencCyWetxV6bIiJklGk2YggZ2t/B7uxYYu/I4bjx+iVexD3Dl0HKMbfY1fhQHYWLrIjnEWIBdkx5oZa+AbZN+6OiU991TfuUdY4yxT+hTf0JLq9fH6Pd+Hal5cDmykoFkdh5Ut1VnGvjXeUojIiKRYg/NpL5hNamsmUCCoQNVbd2TJm9/QCoiIvEp7fu5N4UGlSYTQSCT0tWpba8fafcjZdZ2CitycClH5cpl/LiUJSdHe7I2MSKPcf9lLEu7hFu7aNag1hRc2Y+q165LdUKqk79vRfKpUo/aDZxKi/bcePeJTuXV1TS2TxNyLyQjwbgM1eran8Ij/qHnb7/7qHpIK1rbk0xWiNwb9abh342hPlL7Kj6jg7/2pdaVi5KhQ3Xq2G8SbbiZ+ZOkaXQ5cjT1aetHReVIj1loH/p+y11S5RhvdcRcxlKk2AM/U/eW1aiUmSU5B7emHjMOUmz2b15SAh0b5UmFmmf/3Gpu4i3SiyOzaUDH2uRsLpBgYEfOPpWoUqX0n4pe5amsgw2ZyM0pbH36ZzLTLi2hAWH1yN1GRpDZUPm6YdR/8QVKo1S6tm48dW8SSF6uHuQbWINqVPejCt41qP2kjXQjSUM8JOekuu+UihR7YAb1bF2dnMwEEuSF6KsyZahMmTJUxqkUORQvTk7etalj+CI69kjTR1vf0IND82hYmyCq6O1HgTWCqUagH3lXDKSWg2bTvmjNnwdNvL2bZg9uQzUrV6bAWnWp3tdfU+3qPuTuVZUadRxEv+zJWKe3S4reTTN6NyRfp8JkbqQgAxMrKuFZizp/v4GuZfmUqrYcSKWTYxrT8KPJGdMm0+mIvhRaz51sZSCZTXmqG9qXFp578/HzTqftgzHGWF4JRMQXvBhjjDHGmF7i2wAYY4wxxpje4mKVMcYYY4zpLS5WGWOMMcaY3uJilTHGGGOM6S0uVhljjDHGmN7iYpUxxhhjjOktLlYZY4wxxpje+qyLVVVCPJI/dScYY4wxxliB0f9iNfEGNo5vDNegH3A126dHk0/+gBatv8PuB2laZkB4eXQ2BnSsDRcLGWQGNijt7gEPDw+4l3dDubIlYW9jCiOXEfg3LWP6bX1QztYZPTbHQj++mKCPfcorCetEL3Fq6UR08beF3KQplsd97D5+6VS4u+MXjO5cBUXkxqgT8ThjHHKRbzxGjDHGCpj+FquUgKi14WhUtT76/rYdt5M+PGSah0xB5CAVptRriTkXkjTMSIB1YH/M+utHtHKQQ7BthYizl3Dp0iVcvnIV125G49Gj1ehg/f574KKohFKphFIlapjnx6ePfcqrHNdJsEblTmMxoK69Hifq50yOUvWHYPLopiglz/o/kvONx4gxxlgB09vji+raPhxQNsLco7swzFOhYSoBVtXGY1GfRIxv3h9bnul4zlHuiNIOb4/WAmwbL8CtuDv4q3lhCFob5oPX29Gr9licVmqbSEOfJLXVV1/iOn0p9HEb0OP5M8YYK1B6W6zKXZuiT2hVOJjldKiUo0zXYWiZsgxDf/hHt3tYFa74ZmoPlNdUExcYEXeW/oyVMbqcLc1LW32lbZ0EFHzV9H9AfIWLa77HyIUXkf+1my5jVNB5/CVuJ4wx9v9Fb4vVXDELQqMQC9xevhB7E3SZgSHsXUqjkAAor65CeMcAFJEbIeT3B3h/iFMhZvd3aBlQEf5Bwahe2QPu/gOx+YUS11cNR1N3SyjsumLtlZ34tU99eBaviqmXUzW0IYBe4MjM7mg35TDi7y5B12oBqBY6F1GqD3v3QZ9yapsWjW0TWsPf0xtVq/vCo0I9DPz7KpKhwrVlvRFc2gxykwB0HdUJtbwcYG1iBDP7img35yBOb56JQa2qwaWIGYwLOaL64M14qOk4r7qBdWNawsNCBnnRKug0ah6OPs84u618jcfPEjPud0xD1NpJGFDfGVYerTFx0028ye06CSm4tWUaejesCAdLY5gW9UaHBZeRonVcNY1ZRh/Fpzg6qwfq+FZC1RpB8PfygF/zkVh5JfFd+zzHS+NYaKFl/G6v/BZBpUwhtwhEj5EdEeLlCGsTQ5jZ+6Lb0utIzTyfN7ewYUxLVPOthroNGyDEzwvlnZwQ9FMSGrTwgKa/zdRvAznEUpcx0jmPRTzZPwM9qheHQqaAQ81vMf/kayhvbsD41l6wKV0H/eccwXNR+jbGGGNMj5G+U92iGdUMyaDSFLqi1DSRkq5PDyADeSkadCRV/SRpp2i0m4JkRXvQrjfvf/3mwGBq88tNUmVe5O1fKdDQkGrOuf/u9+KT5dTcpiiFrn5KIhGR+JI2dm9P8x6KRJRGZ8a6k0LhRF8PnU6LNmyhn1rWpe8PLdXShogoiVa1MiaF22g6lZZDGD7ok6a2CXRkWHkytmtE82+lEpGKHq8OJXtDZxp8OImI0uj8xAqkMKhAvZftpRMXrtLlE2tosJ8FCQpnCvt1Le05fo4uXzxEc0NLk1zuTMOPa4gpERG9ovXt7Uhu3ZpWvni7XiLFrmxNdmUG0eGUdwNAp0b7Uctlz+jtVNLWKSO2BpVpyIbjdPFmNEVf3U2TalqTrFAzWvZcJE20j1kynZpQicyLNKaI6xmdTHtA67s4kUGxlrT8wduRz0u8choLdXIev7fxGLrpX7p8+z5FX9pEQ3zNSGYdSmvj3s4nhY6PdCMjm8a06H7GhpN0hsZ5G5BJlel0NdO2pLz6A1U2MKLa8x5pHBtJ+a/DGOmex0Tiix3Us7SCzAKm0vk3RKS8TQvbfE0TTyRImD9jjLHPxZdxZhVyFPuqKOSqR4i6Fqf16WWKXYPunq5wdU3/cQ/7Cw8kXCFU3buMqwmGsLQ0Tb/SKVih0c+/oW2RTNc9FR5oP244ujZrhEG/zkAz1dWc2+S31zvxx5/XUKhJX3RwMgAgQ9GGoahldhuRy47gzdvpZMXh3yAEfp6uKO/XEuP6BsEQxigb3BS1AyqgvEcQvhnQAqVxH7fupmpZoCXqtmuCwq93Y/WOF+mxp2fY8vdOxN5dhxVHMs4hKs9hw4HSaNnAVrer+XJ7VKwZAI8yjnB0rYMBXQNhmHQL1+9rPk2mdcxeb8cvs8+hUIsB6OxslN5AURxNwnujUuxm/LzoCrLMWZd4SR2LzKS2kdvDu4Y/ypcuAUf3RhjcxQ+KhJu4/jCj18pr2LnjBsizDmp9lXE/toknagUWQ+rZQ/g3Lnf3d0vKfx3GSCMJcRCs6+GHOV1Q+PRU9J35H/6bOQjbqv+KcD+z3C+PMcaY3vpCilXA0MwMBoKIF89joa32FGxbY+HFKERFpf+c/6MpLCRUTwrnaqhSOAYLO4Sg69QVOHo3AYKlLaw0XEtVOHjAwyd3bfKD8uZZnI+XoZRLGRi+/aWhPYoXEfDy9i3Eqq1RBBgZG0OACqpMdYVgaARDAVDl8ES4Wc0wNPsqAXtXb8dzAsSHG7A2tiL8zGKwMfIAEgEoz2/EIaeWqG+dP4W6oaEhkK2/2WkbM+XVEzjzWo4yrmVhkKmNvKQn3C1ViDp5BprrOWnx0mUsdB8/o6z9EeRQKAQgLS3TvamEtLQ0QG4ME4PcjUNu8x+QNkaaSIuDAJv6P2J2J1ucmFgLYWfaYFZf1yzjyRhj7PP3xRSrkMkgA5CWqu2dqx8ycvOHv51hzhNaNsCsnUsw2D8JWyZ2QFDZEvBo8xtOajtDpUubPKKEeCSSCtf/7IxawcEIDg5GcO1B2CmWgIOVIVQFsWiT6mjXsiSS9q3GtqdKRK/bCHmXCIxqaotnm//Grrg0XNh4GGVa1oPVx3xISkv8xdev8FoUYGJqkvVMr2AKMzMBYvxrJOQxVrqMRb6Nn9wFjZp6QH5+HSIvpN+Dq3qyAyt2x6JoszCEmOdyZT5yLkuOg2CL+qO+ha/qNZ48fYnkL+MlxIwxxjL56M+/FxQxJQWpAIxMjHPVTu7aB5NcpUwpwNyjPX7a0h5Tn1/Ctt+Hod/3w9DJqQou/eijQxu/Agm+YF4I5oICjn0jsX9QKTV/jSjxMt+XaoSAsNYoM2cOVm08gkdbzNEyshzqlmoO+xWrsGLzQbgedUbLEYXyfcnaaY7/uRbWsJQRkhKTQcj0EDslISGRIC9kBYs8/imX81jo0kaJZ5KWbgDv8PWYfaYOfurbBP8UNoWYIodD9zU4MKgpCuf6jwZd8l93kmMnPsDq73bDvX9T3PttPAb82RBbe5aGXNP0jDHGPjtfyJlVQmJCIkQoUKRY4QI5UIn392P35fR7EQ3sPNBs3DwMqypD9I3b6u89lNRGSC+SiHT4KpX6toqyFeBhrsL185eg6TMJBcHAJxRt3VJxYFovrCraBo3sBJgGtUerkgnYPmkAdpdtiVoWOc0lL/H4kLb4q9z84Wupwq2rN5D5XLwq+iKuxMnh5u+DvJbWuoxF/o0f4dXR3zHvWSesObwP2zdtwc5dG7EgvClcTHM/N13yX5q85LESNyIGYrnbdMyZPhvTmhhg75iBWHo/820r+ZtTjDHGPr4vpFhV4d6de1DJneBZvmAerhAf7cHkKWvxMOMGQIq/iesxCnj6ekHTudyc28hgYWkBijmL0/dSIaYmIlnyyy81tC1UD706OuFF5DB8u+g0nrwhACokv3iImJe5u0UiVxSeaBvqBTH6NbxDv4a1AMAoAO1DnZF25zW8WwUj55HJSzw+pDX+FvUxuL834jfMwpKbGeWWMgZbf4zAf4WbY3hXt7z/0aPLWOTX+KluYeHoWTh3fx9mj/8OkydPxuTJUzD1x18xf+0JPMxlhalL/kujex6nnJ2Ogbtr4dfhFWEsK452MyajjrgdowdHZnp9WP7mFGOMsU/gU7+OQJO0i8toWO/O1LZJEDlbykhmUYYCG7ehTj2H0qKzKVknFmNpaRMzkpceRIffqJvXEhrYPoTKmgkkGJYg/2YdqOfwZXTxg1fZqOju9uk0pJ0fFZbJqWhABxoyfQdFq4hUt1dQ7xqe5OoZQDVCgqmKT2VqOHQVXUtW0q1Nk6idjxXJ5A5Us2c4zTuS/nofzW3eLzHx5I9Uu4QpGVl+RW7Bg2hjjEpynzS2TblJG8a2Ir9SNmRiXIiKOXlQYIshtOxiPEVvn0IdfG1IJi9Bwb0m0NqoFLqxaTL1q12K5DIr8m4zguYdfUrPjkXQiDbeZCWTk2PtgTT3n1jS9gIiIiLVjZ8p0KkX7c70Viblxe+pUuk+tDc5y5QS12kgRSyemBFbR6rVfybtf6SkZ0d+p0F10/vr034q7biXPWYZS8kp/qrHdOiXrlTTx4eq1QyhQB8vCmgxmtZEJb7rZ57jpXEsEtX2Oafxe7B7GnXIEo80itn3C/UNLkFymS1V7vwT7XmoIiKRnuwZQ4GFZQQg24+MbOvNpRtKFd3ZMYNGdvQnO5mcitfoQWPm7KMT2z4cmzs3peR/7seISIc8vnCPjs4bRo1cbMk77DvadFNJRKl0KXIENXYxIkFWhAK6TKatd1Ta588YY+yzIBDRZ391jJ4tRwuXXngSfgZHhrvy/WqMIQnnZ4dh0M1+WDezDmze3qOa9gwHRoag7u/W+Pn6QQws+YVcXGGMMfbF+gIesErE8Z+m4bBbOPb040KVMQAQ7y/GkDG3Uf1QzfeFKgAY2MHLyxEKAzmscvyUMWOMMfbpfeanVZJw9c+u6HusPiI3jIaPyafuD2P6QVCYwET2FJcuPMr0ABkhIWolRkw/hrK9BqOZLRerjDHG9N9nfBsAIW7LSPQ8XgVTxjVHWbWxrHIAABlmSURBVC5UGctEhZh9MxA+dQ0uJhrDwlgGVWoS3hg7o3bXYRjW3gc2n/mfqowxxv4/fMbFKmOMMcYY+9LxuRXGGGOMMaa3uFhljDHGGGN6i4tVxhhjjDGmt7hYZYwxxhhjeuuzLlZVCfFI/tSdYIwxxlhWFI/zf/VH01rVUMm7OsYfLsDPfbMvnn4Wq/QKeyc2RhV3Zzi7V0Tlyl5wda+GNmNW42ri+8mST/6AFq2/w+4HUjYCEc9PzMfgplVQwbsyAqoEoFL5snCpGIyWUw7idYGtTD6jlzi9bAqGNiwLAwN3jDmj7UPnhJfb+qCcrTN6bI7Fl/HaBwnrRC9xaulEdPG3hdykKZbHfew+MpZbKtzd8QtGd66CInJj1Il4nJHbudyGv8jcJ7w4OAM9WwfByVwGmVEJ+DVph15TtuOh+Kn79iUSEb17FsY2LyfhGKMJ4eX2QWg2XYFhWw5jU29r3H/2fz5YX+S2+fHoabGaiOvHHyEw4hSuXT6LU6cu4MzKVng5tz1CukQiJiPnzUOmIHKQClPqtcScC0laZ5l0fBzq1JmKmGaL8M/ZU/j3+L84c/kctgz+ChePXkfy51LJCdbw7TgG33fylvT5MVFUQqlUQqn6cnYUOa6TYI3KncZiQF17PU1wxrKTo1T9IZg8uilKZfsMX6624S8y9wXYBA/F/MhfEOooh2DdAJPXrkDEmAYo/uWspB6RoWTdAQgP9dT9E5f0Cjv+XIMn7gGoZCpH8fa/YVx1g/zs5Ofni9w2Px79jJmggJVrQzT2s3rXQTPPXujX0AJPtszH+nd/TguwqjYei/okYnzz/tjyTFPFmYx98+bh8lehGNrRDeZvP9wjmKNcq1AEWX6pX/IRYNt4AW7F3cFfzQvj3Vq+3o5etcfitC5/MH9yX+I6MaaJhnwvKAW9HenTdqpPffnSqG7hUlQKjM3MIAcgWJSGUxH9LDf+L32Gua+f2SMURbvfJiHIMPMvDWBX2Aoy1VM8znI5QY4yXYehZcoyDP3hHw33sL7ByxdJUD29huux2Qpa08aIWP4Nin5m9apMLtfxwCXiztKfsTLmyznTqn2dBBT8EZ4xHYivcHHN9xi58CIK5piR29wv6H2DPu179KkvXyBKREIiQZDxzle9T3lc+jxzXz+LVbVUePH8FURFcTjYZ7tOZhaERiEWuL18IfYmqGtrDi8fZ8hfbUb/hr0x/+hDpLz7PwFyRbYwpEVj24TW8Pf0RtXqvvCoUA8D/76atRCm1zi/bCiaBXjDNzAIgQFecHH2Qu0px9O/xS4+xdFZPVDHtxKq1giCv5cH/JqPxMoriQBUuL3yWwSVMoXcIhA9RnZEiJcjrE0MYWbvi25LryM1y6o/xqEZXRHs4QqvKsGo26gJWk37Bzndqau8ugrhHQNQRG6EkN8fQKQXODKzO9pNOYz4u0vQtVoAqoXORZQqp3VW4dqy3ggubQa5SQC6juqEWl4OsDYxgpl9RbSbcxCnN8/EoFbV4FLEDMaFHFF98GbN95OpbmDdmJbwsJBBXrQKOo2ah6PPM/6IUL7G42eJGffmpSFq7SQMqO8MK4/WmLjpJt7kZp0AQEjBrS3T0LthRThYGsO0qDc6LLicafzVdhAxu79Dy4CK8A8KRvXKHnD3H4jNLzL6qHVs8yleUnIwO01tVFFYObg2HBUyKOwD0W3sClxIBoAknF8xHj2CSqCQVxhmHXmeHneNy85l3maLaZ5jIiHu11cNR1N3SyjsumLtlZ34tU99eBaviqmXVfkb09zG4s0tbBjTEtV8q6FuwwYI8fNCeScnBP2UhAYtPDRebv1gG84UT605CuQu93PajjTGQcST/TPQo3pxKGQKONT8FvNPvoby5gaMb+0Fm9J10H/OETwXc5h/QdDU55zW9Z1PsR1rGlclri/pikpFDaEoFoz27WrAw9EWZqY2cKrWGb8dV3M/cw7LTvvne9SqUB5eATVRK9ADZTzqY+Tme9A0JPTiMKY1KAVTx2D0+H4L7miYMP7kPHwbOhzrnqjwettQBAUEoEq977FqhZZt82PsU/NrbCkepyK6wd9ODplRWdQf8AO23VEB9Bz/RIxEqI8ditfoh7/+U1uMpJOybUrIHc1jqGlf6Ieeo775uNthfqHPRdppCi+vILNaf9BdVfb/VNL16QFkIC9Fg46kqm0uxu6jET4WJAAEQUFW5WpT96mr6L+nadmmTKAjw8qTsV0jmn8rlYhU9Hh1KNkbOtPgw0kZ06TS5V+CycrMl0YdjqW33Uk4PJh8O2+iFEqmUxMqkXmRxhRxPSWj/w9ofRcnMijWkpY/UBFRGp0Z604Kg8o0dNO/dPn2fYq+tImG+JqRzDqU1sa97U8S/TvWm0zN/WnssZckZvw2cWVLMlaUp/DT2fufler2rxRoaEg159zP6GcSrWplTAq30XTqXVMp65xG5ydWIIVBBeq9bC+duHCVLp9YQ4P9LEhQOFPYr2tpz/FzdPniIZobWprkcmcaflz9WKR7Revb25HcujWtfPF2rUSKXdma7MoMosMpb6dLo1Oj/ajlsmfv1l3aOr2P75ANx+nizWiKvrqbJtW0JlmhZrTsuUiaiE+WU3ObohS6+mn6MsWXtLF7e5r3UCSSNLZ5jZeU8cguhzaqaPq9lhnJLJvS0ieZ1l18Titau1KXTXHS5iM5b9XJS0ykx/3MWHdSKJzo66HTadGGLfRTy7o0+WJc/sdUcixS6PhINzKyaUyL7ivTf5V0hsZ5G5BJlel0Vfl+icqrP1BlAyOqPe+RlnzPKUd1zX1125GUOBCJL3ZQz9IKMguYSuffEJHyNi1s8zVNPJEgYf4SpZ2i0W4KkhXtQbve5DRxTn2W2pePux1rH9c3tKe3Pcmtm9H8G68oVVRS/O2dNLa6Ncmt61HE7cwHxpyXnbK2A9WeHkVviIjEZxTZyobkRbrStsT0OWQ/xqgerqfuNdrQwqgUylHqQervqCCbLlvp/dSats2Ej7BPze+xTaV/R5YjhbwsDTmaKRlVd+i3WpVpwjlNSSV125SWO9rHUFO84/O2HX4in0mxKtKLLd3IwcybxvybqHaK18ubkjGM6OuF74uaDyTepO0zelNdF0uSCyBAIJl1ZRq8LebdQYDi1lKYtZyKdNtO71IicSN1sJZTsR670je8+C3UpaicbMLWUZZj85tTNH/BSUqNW0dhNnL6qvceyrxZK69PpwADA/L+7iIp3yaScRNa9urtFCq6P6cmGRr40tQr6Ucw8eUqamMtJ7uOmyg+86rkZ7EqZZ3fbthGDeiv2PfF5YvFDclI4Unjzr7vR+qxoVRWbkwtIjMfqD6UsK0bFZNbUou/n2fsmJ/Q4sYWJMgdqPeejJ6knaLwgLa0Ivb9qOaqWM0SX6KXSxuTcbb+Zpd2ajS5GjpQr13v+6989ZxephGRpLHNY7wkjUc2ObYR6eX69lREZkyBv9x8l++qe39QvQrD6FiK1PlIy1sNkc1DTKTHPXv/0u5dpCv31hRATCXGIu08TfBSkGGNWRT9bkeTSgf7OZDcuGGmWEgvVrXmqM65r6GAk5SPIsVu606lFOZUbdoZOvNzE2o++yqlSpm/VLkpVnPscy6L1Y+0HWsf1/RiVZFt/VNPh1N5hYK8Jpyjd72RsGzl3RN08tHbjFLS1R8qk4FhTZpzP/13mY8x4otDFF6/Bc08p/74+wFtxeoH2+bagt+nqpW3+Smv/kCVDeVUoucuehsV5bVpFBQ8g258cEJNcwyI1GybEnNH+xhqiPfLxM+yWP0sbgMQH23EoMF74TNzJcb7m6qdxtDMDAaCiBfPY6HxzL9pGdQfMhe7oh4h+tgKTGzlBrO4U/it8wCsepp+EUV58yzOx8tQyqUM3t0ya2iP4kUEvLx9C7EEKK8cxfFYOVx9K8A8Syd80aN7ZQhXT+DMaznKuJZF5ucf5SU94W6pQtTJM4hT+yyYACNjIwhQQZVxWl556V+cei2He2VvqF/zvJOyzuoJMDI2ztJfABAMjWAoAKocnl42qxmGZl8lYO/q7XhOgPhwA9bGVoSfWQw2Rh5AIgDl+Y045NQS9a3z5wYfQ0NDIFt/s1M4V0OVwjFY2CEEXaeuwNG7CRAsbWGlAJQ6jy0gNV66jEfObQRYNeiLjmWU+HfBn/gvDQBUiFq2HIlh3eFnJHU+mtbL6IP1kkZiTPIQd4WDB5yfnSuAmGpan2yxEORQKAQgLS3TvamEtLQ0QG4ME4Pc57a2HNVESu6rIy0OAmzq/4jZnWxxYmIthJ1pg1l9XfGpnv/WfZ8mRcFtx7qMq4F7ACqZq3D9vwt4e+FZyrLlJf1QudjbEkCAgYECAgiUvV/JFzCrbRusdR2MXhXy9yikcPCAc8zJAt+nSid9fnKXDuhWwxgx65diVxwAqHB55SYUahcKp1xWVtm3Tam5I3kMMygcPOBm9XneR6zzmyk+Fordj1EtxiB+2HZEdi8HQ00TymSQAUhLlfDOVcEExQPCMGF1AwQProLas3Zh89EUhLUwASXEI5FUiPmzM2rtyDiCIxWvxBJwsDKEigAx7hXiRAGO5uZq75EWX7/Ca1FAKVOTrP8vmMLMTIAY/xoJEneW4qtYvBJl8LAsVGA3GEtZ5wK5GdykOtq1LImFC1dj29Mw1Fi3EfIuERh1pAZabf4bu+LqoNTGwyjTcgQ+6vZl2QCzdi6B7egf8dfEDlgyvhBcW0zC4gUD4C1xbG3y0F9dxkNSGyN/9OxZGXNGLUXEvnD4hlzCkvV26LrFBfLczOcTkLpNaYp7gcVUCrkLGjX1wNTp6xB5oRfGeJtB9WQHVuyORdFmYQgxz3kWH9CSo36WOsxPC8mxE2xRf9S38F0+ClefvvykrwPUhzzWab+qy7jKzGFhJkBMjEeiCFjJpCxbhSf/LMBPc7fhcrwRCpnKkXjtBpTwyDpv8Tm2Dh0EC5WAW/NHYXb3AxhePn//BMnrtv3JyIqjzTcNMKbdFizZ/AzNQu9g5U5HdNia91dTScsdFZ4clTCGXwi9LlbFxzsxrPVEvB60FSvbOmkuVAGIKSlIBWBkYqzmf5NxYukmWLQPRfnMz2YJlqjapj5KzvkDyclpAEwgmBeCuaCAY99I7B9USm3SKa2sYSkT8erFK4goimyPe0FmaQ1LGSEpMRlZ9keUhIREgryQFSxkwHMJMRBMzGAiiEhMSIQIqwIpWKWsc8EwQkBYa5SZMwerNh7Boy3maBlZDnVLNYf9ilVYsfkgXI86o+WIQh+tR+kEmHu0x09b2mPq80vY9vsw9Pt+GDo5VcG5FtLGNk9L12E8pLWRw7lTXzSY0hlrIzYgnI5ir1d3jLEXcjEfJZ7pumJ5IHWb0qRgYio1FgbwDl+P2Wfq4Ke+TfBPYVOIKXI4dF+DA4OaorBOB2HNOXrpRx9dZqh5SVJjJz7A6u92w71/U9z7bTwG/NkQW3uW/mD/WFBUV//CL9e+xtBm9hL6nIzjBdwf3far2sbVW30TMR6vEwEDaxsUkklbtnj3D9StPwyJI45h7zhvmEHErRmB2BOefeapKNwuAltanUb7il0xZUAEmu/qh7L5OKh53bY/HQE2jb5BK/v1WL5kLW463MAhj44Iz4eqWkruiHcj0F7SGH4Z9DIFACDt7mr0bDEdxhM3Y34OhSpAGcWcAkWKFVazc3yDa5sWYa+aRwPfPH2KVzIneLilX95QlK0AD3MVrp+/BE2fGVC4BsDPWoUr+w/ikZqrDAo3f/haqnDr6o0sT+yroi/iSpwcbv4+kFp+KVy9UN5IiYv/nkZizpNLIKTvDIjePT0qZZ0LioFPKNq6peLAtF5YVbQNGtkJMA1qj1YlE7B90gDsLtsStSxymsuH65QX4v392H05/VluAzsPNBs3D8OqyhB94zZU+Ti2mugyHlLbCEVaoG9YCcTv/Bmdvj+LWj3qZunvp8wFbfK6TRVkTHNGeHX0d8x71glrDu/D9k1bsHPXRiwIbwoXHa+qasvRNzr3U/12JC0OStyIGIjlbtMxZ/psTGtigL1jBmLp/cw7SA3bacojXLlwT/LVJvUIz4+ux8HHBEFSn/N3n6GOLvmjy7gqr/+HC4kG8K5aGWYSl5124V+cTimGgJru79qoJfsK/oHOMCneDj9PCoFwcCIGLY7WfKudDvLzePnRmYWgW4eySD0SgW+nnEBAx1rQ5UJJdlJyR/IYfqDgc78g6GWxSk82oGet3jhdJhiFL67ArJkzMfPdz2ys/i8uW5BVuHfnHlRyJ3iW1zBsaacQMe5PnHzy/oUyaY92YcL3m2DYeDR6eWecZC5UD706OuFF5DB8u+g0nrwhACokv3iImJcZm5LF1xg6xB+y/ePQYex6XHiaAgJAqXF4eOcREs3rY3B/b8RvmIUlNzN2McoYbP0xAv8Vbo7hXd0kn22QfdUCPZsWwYs1YzF05RXEqQBV0iNcvv5Uxx2GDBaWFqCYszh9LxViaiKSTSWsc0FReKJtqBfE6NfwDv0a1gIAowC0D3VG2p3X8G4VLGFDVLNOeXhxpfhoDyZPWYuHGfOg+Ju4HqOAp68XjC3yb2w1kpKDOrcxRXCfb+CpuohTYit88/Zm1bws+2PIa9wLNKY5UN3CwtGzcO7+Pswe/x0mT56MyZOnYOqPv2L+2hN4qEN1qTVHcz+7DBq2IwlxSDk7HQN318KvwyvCWFYc7WZMRh1xO0YPjsz0+iB180/A1j4V4enthXZLHuXh4JmKsyfOw8DcLP1AnGOf83efoZYO+ZPrcU27h9VT/0TUV2EY0cnp/QE9h2XLi9qjKGJwaNMh3I+Lx7Mb/2D36ScaX1sFyOHUfSbC/VOwc8zQ/H1H58fYpxYYA/h26YKKuIADT2qhYxWjnJtIISF3cj+Gb32E3C8In/oJL3XSzo0nT0V6/ffhjyFV+fn908xERCTG0tImZiQvPYgOq31KNJWurRtP3ZsEkperB/kG1qAa1f2ogncNaj9pI93I/haRlJu0YWwr8itlQybGhaiYkwcFthhCyy5mehJSjKWT8/tTQ28HsjI2JDM7R3LxqUPfzDpGL0QiUj2mQ790pZo+PlStZggF+nhRQIvRtCYqkYhU9GD3NOrgY0UyuSPV6j+T9j9Ko5h9v1Df4BIkl9lS5c4/0Z6H6Wspxp2h+b1qkkthUzI0LkT25WtRlw6BZCM3pVI1etOya+qewFbR3e3TaUg7Pyosk1PRgA40ZPoOilYRJZ78kWqXMCUjy6/ILXgQbYxR5bDOKorePoU6+NqQTF6CgntNoLVRKXRj02TqV7sUyWVW5N1mBM07+pSeHYugEW28yUomJ8faA2nuP7Ga387wtqc3fqZAp160O9M4KC9+T5VK96G9ybqs00CKWDyR2mWJr5KeHfmdBtVN769P+6m04576RzZVt1dQ7xqe5OoZQDVCgqmKT2VqOHQVXXvbF61jS/kTLyk5mJ3UNqr7FPG1PTVcFKN+bDTOJz5XeZttoXmPSY5xV9KtTZMyxt2BavYMp3lHnr5fx3yNaW5iIdKTPWMosLBMzf5MRrb15tINpYru7JhBIzv6k51MTsVr9KAxc/bRiW3q811zjmaOQe5zX+2+QVscLtyjo/OGUSMXW/IO+4423VQSUSpdihxBjV2MSJAVoYAuk2nrHZWG+SfT6SlVqbC1Jw3Zq+m9ZyK9ODKbBnSsTc7mAgkGduTsU4kqVUr/qehVnso62JCJ3JzC1md6njyH8da4rvmZs7nMOe37noxXV5mUoir161O9OiFUtUJ58m02itZeT/5wZtqWrbpPGwZUoxJmhmRkVZL82kyidbNDyVpuReUbjaFVW+bSgNolSS6zJO/2s+mflyJRwima1bAkyQUZFQnoRr8efKp2/6GK2U3TetQiJ2OBDEoEUOsew2n2wRi6qW3b/Bj71Pwe23ezuk9z61hT1Z+uk7Z3oWTdP0nYNnPKHa1jGE4L3x0DP4x3zrmvfwQiTc+NfT7o2XK0cOmFJ+FncGS4qx7/FcYY+/+ThPOzwzDoZj+sm1nn/YMiac9wYGQI6v5ujZ+vH8TAknp5oYvpjVTs7VMK9Tc3xc47c1FL+71x7GMRb2NmwyEwX7ge3YvzNlxQ9PoBK2kScfynaTjsFo49/bhQZYzpF/H+YgwZcxvVD9XM+kSzgR28vByhMJDDykzfHnVmekulknCplxUkZUoKYGwMBYDUM4uwuVgXrP+KC9WC9JlHNwlX/+yKvsfqI3LDaPiYfOr+MMZYVoLCBCayp7h04VGmB0gICVErMWL6MZTtNRjNbLlYZezzkIajE77FovtKvHmwA6OHnUT9YY0+7usV/w99xmdWCXFbJmLirTCs3dMcZblQZYzpIcG+E+ave4bwqS0QMNcYFsYyqFKT8MbYGbVH78Oh9j6w5AMd0yoVl5aPwsz9ryDG7cfP/UbiTpeR6FnVpkBegc20EWBqcgnjPawx2q4yuv6yGIPdP+NS6jPxRdyzyhhjjDHGvkyf+W0AjDHGGGPsS8bFKmOMMcYY01tcrDLGGGOMMb3FxSpjjDHGGNNbXKwyxhhjjDG9xcUqY4wxxhjTW1ysMsYYY4wxvcXFKmOMMcYY01tcrDLGGGOMMb3FxSpjjDHGGNNbXKwyxhhjjDG9xcUqY4wxxhjTW1ysMsYYY4wxvcXFKmOMMcYY01v/Ayf1VC/YtdR7AAAAAElFTkSuQmCC", "other_fields": { diff --git a/docs/source/_static/json_format_examples/with_inserted_table.json b/docs/source/_static/json_format_examples/with_inserted_table.json index c49e4316..9262be2b 100644 --- a/docs/source/_static/json_format_examples/with_inserted_table.json +++ b/docs/source/_static/json_format_examples/with_inserted_table.json @@ -1,5 +1,5 @@ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -491,7 +491,7 @@ "start": 0, "end": 14, "name": "attachment", - "value": "image1.png" + "value": "attach_fce0c064-5d3c-11ee-b518-0242ac120002" } ], "metadata": { @@ -514,87 +514,119 @@ { "cells": [ [ - "Table header", - "Table header" + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 2, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + } ], [ - "Vertically merged cells", - "Text 1" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 2, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 1", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Vertically merged cells", - "Text 2" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + }, + { + "lines": [ + { + "text": "Text 2", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Text 3", - "Text 4" + { + "lines": [ + { + "text": "Text 3", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 4", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ] ], "metadata": { "uid": "3a327789721e09b3fa6fd9560f3ee263", "page_id": null, "is_inserted": true, - "cell_properties": [ - [ - { - "colspan": 2, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": true - } - ], - [ - { - "colspan": 1, - "rowspan": 2, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": true - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ] - ] + "rotated_angle": 0.0 } } ] }, "metadata": { - "uid": "doc_uid_auto_5fc70b1c-0e90-11ee-8789-4549ad8e7206", + "uid": "doc_uid_auto_fcedf964-5d3c-11ee-b518-0242ac120002", "file_name": "example_return_format.docx", + "temporary_file_name": "1695822701_99.docx", "size": 21270, - "modified_time": 1687172373, - "created_time": 1687172373, - "access_time": 1687172373, + "modified_time": 1695822701, + "created_time": 1695822701, + "access_time": 1695822701, "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "document_subject": "", "keywords": "", @@ -602,8 +634,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null, "other_fields": { "document_subject": "", @@ -612,8 +644,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null } }, diff --git a/docs/source/_static/json_format_examples/with_parsed_attachments.json b/docs/source/_static/json_format_examples/with_parsed_attachments.json index 8f404c96..2c519c3c 100644 --- a/docs/source/_static/json_format_examples/with_parsed_attachments.json +++ b/docs/source/_static/json_format_examples/with_parsed_attachments.json @@ -1,5 +1,5 @@ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -298,7 +298,7 @@ "start": 0, "end": 14, "name": "attachment", - "value": "image1.png" + "value": "attach_fa5c54ac-5d3c-11ee-b518-0242ac120002" } ], "metadata": { @@ -321,87 +321,119 @@ { "cells": [ [ - "Table header", - "Table header" + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 2, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Table header", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + } ], [ - "Vertically merged cells", - "Text 1" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 2, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 1", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Vertically merged cells", - "Text 2" + { + "lines": [ + { + "text": "Vertically merged cells", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": true + }, + { + "lines": [ + { + "text": "Text 2", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ], [ - "Text 3", - "Text 4" + { + "lines": [ + { + "text": "Text 3", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Text 4", + "annotations": [] + } + ], + "colspan": 1, + "rowspan": 1, + "invisible": false + } ] ], "metadata": { "uid": "3a327789721e09b3fa6fd9560f3ee263", "page_id": null, "is_inserted": false, - "cell_properties": [ - [ - { - "colspan": 2, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": true - } - ], - [ - { - "colspan": 1, - "rowspan": 2, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": true - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ], - [ - { - "colspan": 1, - "rowspan": 1, - "invisible": false - }, - { - "colspan": 1, - "rowspan": 1, - "invisible": false - } - ] - ] + "rotated_angle": 0.0 } } ] }, "metadata": { - "uid": "doc_uid_auto_5dc67f28-0e90-11ee-8789-4549ad8e7206", + "uid": "doc_uid_auto_fa7fdbc0-5d3c-11ee-b518-0242ac120002", "file_name": "example_return_format.docx", + "temporary_file_name": "1695822697_469.docx", "size": 21270, - "modified_time": 1687172370, - "created_time": 1687172370, - "access_time": 1687172370, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "document_subject": "", "keywords": "", @@ -409,8 +441,8 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null, "other_fields": { "document_subject": "", @@ -419,14 +451,14 @@ "comments": "", "author": "", "last_modified_by": "", - "created_date": 1568736411, - "modified_date": 1686923436, + "created_date": 1568725611, + "modified_date": 1686912636, "last_printed_date": null } }, "attachments": [ { - "version": "2023.05.26", + "version": "0.11.2", "warnings": [], "content": { "structure": { @@ -446,15 +478,57 @@ "annotations": [ { "start": 0, - "end": 27, + "end": 3, + "name": "confidence", + "value": "0.96" + }, + { + "start": 10, + "end": 17, + "name": "confidence", + "value": "0.96" + }, + { + "start": 0, + "end": 3, + "name": "bounding box", + "value": "{\"x_top_left\": 0.05417276720351391, \"y_top_left\": 0.27358490566037735, \"width\": 0.0527086383601757, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 4, + "end": 9, + "name": "confidence", + "value": "0.95" + }, + { + "start": 4, + "end": 9, + "name": "bounding box", + "value": "{\"x_top_left\": 0.11566617862371889, \"y_top_left\": 0.27358490566037735, \"width\": 0.09077598828696926, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 10, + "end": 17, + "name": "bounding box", + "value": "{\"x_top_left\": 0.212298682284041, \"y_top_left\": 0.27358490566037735, \"width\": 0.11859443631039532, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 18, + "end": 26, + "name": "confidence", + "value": "0.77" + }, + { + "start": 18, + "end": 26, "name": "bounding box", - "value": "{\"x_top_left\": 37, \"y_top_left\": 29, \"width\": 304, \"height\": 13}" + "value": "{\"x_top_left\": 0.3396778916544656, \"y_top_left\": 0.27358490566037735, \"width\": 0.1595900439238653, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" }, { "start": 0, "end": 27, - "name": "bold", - "value": "True" + "name": "bounding box", + "value": "{\"x_top_left\": 0.05417276720351391, \"y_top_left\": 0.27358490566037735, \"width\": 0.445095168374817, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" }, { "start": 0, @@ -498,11 +572,95 @@ "node_id": "0.1.0", "text": "1) Fisrst item with some english text\n", "annotations": [ + { + "start": 0, + "end": 2, + "name": "confidence", + "value": "0.93" + }, + { + "start": 0, + "end": 2, + "name": "bounding box", + "value": "{\"x_top_left\": 0.05710102489019034, \"y_top_left\": 0.4811320754716981, \"width\": 0.020497803806734993, \"height\": 0.16037735849056603, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 3, + "end": 9, + "name": "confidence", + "value": "0.81" + }, + { + "start": 3, + "end": 9, + "name": "bounding box", + "value": "{\"x_top_left\": 0.08345534407027819, \"y_top_left\": 0.4811320754716981, \"width\": 0.0629575402635432, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 10, + "end": 14, + "name": "confidence", + "value": "0.96" + }, + { + "start": 15, + "end": 19, + "name": "confidence", + "value": "0.96" + }, + { + "start": 20, + "end": 24, + "name": "confidence", + "value": "0.96" + }, + { + "start": 25, + "end": 32, + "name": "confidence", + "value": "0.96" + }, + { + "start": 33, + "end": 37, + "name": "confidence", + "value": "0.96" + }, + { + "start": 10, + "end": 14, + "name": "bounding box", + "value": "{\"x_top_left\": 0.15373352855051245, \"y_top_left\": 0.4811320754716981, \"width\": 0.04978038067349927, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 15, + "end": 19, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2108345534407028, \"y_top_left\": 0.4811320754716981, \"width\": 0.048316251830161056, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 20, + "end": 24, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2679355783308931, \"y_top_left\": 0.5188679245283019, \"width\": 0.05856515373352855, \"height\": 0.08490566037735849, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 25, + "end": 32, + "name": "bounding box", + "value": "{\"x_top_left\": 0.33382137628111275, \"y_top_left\": 0.4811320754716981, \"width\": 0.07906295754026355, \"height\": 0.16037735849056603, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 33, + "end": 37, + "name": "bounding box", + "value": "{\"x_top_left\": 0.4216691068814056, \"y_top_left\": 0.5, \"width\": 0.0424597364568082, \"height\": 0.10377358490566038, \"page_width\": 683, \"page_height\": 106}" + }, { "start": 0, "end": 38, "name": "bounding box", - "value": "{\"x_top_left\": 39, \"y_top_left\": 51, \"width\": 278, \"height\": 17}" + "value": "{\"x_top_left\": 0.05710102489019034, \"y_top_left\": 0.4811320754716981, \"width\": 0.40702781844802344, \"height\": 0.16037735849056603, \"page_width\": 683, \"page_height\": 106}" }, { "start": 0, @@ -535,11 +693,191 @@ "node_id": "0.1.1", "text": "2) Second item with some even more english text. Let me speak from my heart\n", "annotations": [ + { + "start": 0, + "end": 2, + "name": "confidence", + "value": "0.94" + }, + { + "start": 0, + "end": 2, + "name": "bounding box", + "value": "{\"x_top_left\": 0.05417276720351391, \"y_top_left\": 0.6981132075471698, \"width\": 0.02342606149341142, \"height\": 0.16037735849056603, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 3, + "end": 9, + "name": "confidence", + "value": "0.96" + }, + { + "start": 10, + "end": 14, + "name": "confidence", + "value": "0.96" + }, + { + "start": 15, + "end": 19, + "name": "confidence", + "value": "0.96" + }, + { + "start": 20, + "end": 24, + "name": "confidence", + "value": "0.96" + }, + { + "start": 25, + "end": 29, + "name": "confidence", + "value": "0.96" + }, + { + "start": 30, + "end": 34, + "name": "confidence", + "value": "0.96" + }, + { + "start": 35, + "end": 42, + "name": "confidence", + "value": "0.96" + }, + { + "start": 43, + "end": 48, + "name": "confidence", + "value": "0.96" + }, + { + "start": 53, + "end": 55, + "name": "confidence", + "value": "0.96" + }, + { + "start": 56, + "end": 61, + "name": "confidence", + "value": "0.96" + }, + { + "start": 62, + "end": 66, + "name": "confidence", + "value": "0.96" + }, + { + "start": 67, + "end": 69, + "name": "confidence", + "value": "0.96" + }, + { + "start": 70, + "end": 75, + "name": "confidence", + "value": "0.96" + }, + { + "start": 3, + "end": 9, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0849194729136164, \"y_top_left\": 0.6981132075471698, \"width\": 0.08052708638360176, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 10, + "end": 14, + "name": "bounding box", + "value": "{\"x_top_left\": 0.17423133235724744, \"y_top_left\": 0.6981132075471698, \"width\": 0.04978038067349927, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 15, + "end": 19, + "name": "bounding box", + "value": "{\"x_top_left\": 0.23133235724743778, \"y_top_left\": 0.6981132075471698, \"width\": 0.048316251830161056, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 20, + "end": 24, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2884333821376281, \"y_top_left\": 0.7358490566037735, \"width\": 0.05856515373352855, \"height\": 0.08490566037735849, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 25, + "end": 29, + "name": "bounding box", + "value": "{\"x_top_left\": 0.35431918008784774, \"y_top_left\": 0.7358490566037735, \"width\": 0.05124450951683748, \"height\": 0.08490566037735849, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 30, + "end": 34, + "name": "bounding box", + "value": "{\"x_top_left\": 0.4143484626647145, \"y_top_left\": 0.7358490566037735, \"width\": 0.05710102489019034, \"height\": 0.08490566037735849, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 35, + "end": 42, + "name": "bounding box", + "value": "{\"x_top_left\": 0.4787701317715959, \"y_top_left\": 0.6981132075471698, \"width\": 0.08052708638360176, \"height\": 0.16037735849056603, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 43, + "end": 48, + "name": "bounding box", + "value": "{\"x_top_left\": 0.5666178623718887, \"y_top_left\": 0.7169811320754716, \"width\": 0.048316251830161056, \"height\": 0.10377358490566038, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 49, + "end": 52, + "name": "confidence", + "value": "0.97" + }, + { + "start": 49, + "end": 52, + "name": "bounding box", + "value": "{\"x_top_left\": 0.623718887262079, \"y_top_left\": 0.6981132075471698, \"width\": 0.036603221083455345, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 53, + "end": 55, + "name": "bounding box", + "value": "{\"x_top_left\": 0.6676427525622255, \"y_top_left\": 0.7358490566037735, \"width\": 0.03367496339677892, \"height\": 0.08490566037735849, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 56, + "end": 61, + "name": "bounding box", + "value": "{\"x_top_left\": 0.7086383601756955, \"y_top_left\": 0.6981132075471698, \"width\": 0.06442166910688141, \"height\": 0.16037735849056603, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 62, + "end": 66, + "name": "bounding box", + "value": "{\"x_top_left\": 0.780380673499268, \"y_top_left\": 0.6886792452830188, \"width\": 0.05417276720351391, \"height\": 0.1320754716981132, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 67, + "end": 69, + "name": "bounding box", + "value": "{\"x_top_left\": 0.8418740849194729, \"y_top_left\": 0.7358490566037735, \"width\": 0.03513909224011713, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, + { + "start": 70, + "end": 75, + "name": "bounding box", + "value": "{\"x_top_left\": 0.8843338213762811, \"y_top_left\": 0.6981132075471698, \"width\": 0.055636896046852125, \"height\": 0.12264150943396226, \"page_width\": 683, \"page_height\": 106}" + }, { "start": 0, "end": 76, "name": "bounding box", - "value": "{\"x_top_left\": 37, \"y_top_left\": 73, \"width\": 605, \"height\": 18}" + "value": "{\"x_top_left\": 0.05417276720351391, \"y_top_left\": 0.6886792452830188, \"width\": 0.8857979502196194, \"height\": 0.16981132075471697, \"page_width\": 683, \"page_height\": 106}" }, { "start": 0, @@ -575,14 +913,22 @@ "tables": [] }, "metadata": { - "uid": "attach_5dc5cb0a-0e90-11ee-8789-4549ad8e7206", + "uid": "attach_fa5c54ac-5d3c-11ee-b518-0242ac120002", "file_name": "image1.png", + "temporary_file_name": "1695822697_301.png", "size": 14874, - "modified_time": 1687172370, - "created_time": 1687172370, - "access_time": 1687172370, + "modified_time": 1695822697, + "created_time": 1695822697, + "access_time": 1695822697, "file_type": "image/png", - "other_fields": {} + "rotated_page_angles": [ + 0 + ], + "other_fields": { + "rotated_page_angles": [ + 0 + ] + } }, "attachments": [] } diff --git a/docs/source/dedoc_api_usage/return_format.rst b/docs/source/dedoc_api_usage/return_format.rst index 90c87789..a75aa65f 100644 --- a/docs/source/dedoc_api_usage/return_format.rst +++ b/docs/source/dedoc_api_usage/return_format.rst @@ -58,19 +58,19 @@ The beginning of the document's tables: .. literalinclude:: ../_static/json_format_examples/basic_example.json :language: json - :lines: 320-350 + :lines: 320-346 The beginning of the document's metadata: .. literalinclude:: ../_static/json_format_examples/basic_example.json :language: json - :lines: 398-405 + :lines: 429-437 The document's attachments: .. literalinclude:: ../_static/json_format_examples/basic_example.json :language: json - :lines: 427 + :lines: 459 As we see, the `attachments` field is empty because the option `with_attachments` is set to `"false"` by default (see :ref:`table_parameters`). @@ -118,7 +118,7 @@ Unlike the previous examples, in this case we have `attachments` field filled: .. literalinclude:: ../_static/json_format_examples/with_attachments.json :language: json - :lines: 427-458 + :lines: 459-491 Example with base64 attachments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -136,7 +136,7 @@ The only difference is in the attachment's metadata: attachment's content is enc .. literalinclude:: ../_static/json_format_examples/with_base64_attachments.json :language: json - :lines: 427-461 + :lines: 459-494 Example with parsed attachments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -156,7 +156,7 @@ The beginning of the document's attachments: .. literalinclude:: ../_static/json_format_examples/with_parsed_attachments.json :language: json - :lines: 427-452 + :lines: 459-484 Example with inserted table diff --git a/docs/source/getting_started/usage.rst b/docs/source/getting_started/usage.rst index 88a80f26..57329ce9 100644 --- a/docs/source/getting_started/usage.rst +++ b/docs/source/getting_started/usage.rst @@ -122,20 +122,20 @@ Document tables The attribute `tables` in the :class:`dedoc.data_structures.UnstructuredDocument` is a list of :class:`dedoc.data_structures.Table`. -Each table is represented as a list of table rows, each row is a list of strings with cells text. +Each table is represented as a list of table rows, each row is a list of cells with additional metadata :class:`dedoc.data_structures.CellWithMeta`. .. literalinclude:: ../_static/code_examples/dedoc_usage_tutorial.py :language: python - :lines: 51-53 + :lines: 51-54 -It also has metadata, containing table's unique identifier, cells properties (information about rowspan and colspan). +It also has metadata, containing table's unique identifier, rotation angle (if table has been rotated - for images) and so on. .. literalinclude:: ../_static/code_examples/dedoc_usage_tutorial.py :language: python - :lines: 54-57 + :lines: 55-57 All tables have rectangular form, so if the cells are merged, in the intermediate representation they aren't and have the same contents. -Use cells properties for getting information about merged cells. +Use cells metadata for getting information about merged cells. .. literalinclude:: ../_static/code_examples/dedoc_usage_tutorial.py :language: python diff --git a/examples/README.md b/examples/README.md index 39938547..09b8ca55 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,25 +7,24 @@ This is the easiest way, since this class automatically determines the format of As shown in corresponding examples, you can create this manager with following lines: ``` -from dedoc.config import get_config from dedoc import DedocManager -manager = DedocManager(config=get_config()) +manager = DedocManager() ``` And after that you can get parsed document with one simple line, just replace `"your_file_name"` with the path to your chosen file: ``` parsed_document = manager.parse(file_path="your_file_name") ``` -To get more information, look at Dedoc usage tutorial. +To get more information, look at [Dedoc usage tutorial](https://dedoc.readthedocs.io/en/latest/getting_started/usage.html). -If you want to call a specific parser, you can look at some examples in this directory. File `example_doc_parser.py` shows how you can use `DocxReader`, -`example_pdf_parser.py` shows examples with PDF file parsing. In order to parse img-like file you can call `PdfImageReader` like it's shown in +If you want to call a specific parser, you can look at some examples in this directory. File `example_doc_parser.py` shows how to use `DocxReader`, +`example_pdf_parser.py` shows examples with PDF file parsing. In order to parse image-like file you can call `PdfImageReader` like it's shown in `example_img_parser.py`. -Also you can look at the example of using a post-request to parse documents while Dedoc container is working. This example is written in `example_post.py`. +Also, you can look at the example of using a post-request to parse documents while Dedoc container is working. This example is written in `example_post.py`. You can check an example like this: ```bash cd examples -python create_structured_document.py +python3 create_structured_document.py ``` \ No newline at end of file diff --git a/examples/create_structured_document.py b/examples/create_structured_document.py index 0a4d468d..907df01a 100644 --- a/examples/create_structured_document.py +++ b/examples/create_structured_document.py @@ -5,7 +5,6 @@ # to create structured document you can use TreeConstructor and apply it to unstructured document # in this example we'll use unstructured_document from create_unstructured_document.py structure_constructor = TreeConstructor() -parsed_document = structure_constructor.structure_document(document=unstructured_document, - structure_type="tree") +parsed_document = structure_constructor.structure_document(document=unstructured_document, structure_type="tree") print(parsed_document.to_dict()) diff --git a/examples/create_unstructured_document.py b/examples/create_unstructured_document.py index eb75f81a..0da38dc9 100644 --- a/examples/create_unstructured_document.py +++ b/examples/create_unstructured_document.py @@ -1,10 +1,10 @@ # noqa -# in this example we create UnstructuredDocument, lets construct document corresponding to example.docx +# in this example we create UnstructuredDocument, let's construct document corresponding to example.docx from dedoc.data_structures import LineMetadata, Table, UnstructuredDocument from dedoc.data_structures import TableMetadata from dedoc.data_structures import LineWithMeta -# First of all lets create some table, table consist of cells (list of rows, and row is a list of strings +# First of all let's create some table, table consists of cells (list of rows, and row is a list of cells with metadata) from dedoc.data_structures import HierarchyLevel from dedoc.data_structures.cell_with_meta import CellWithMeta from dedoc.metadata_extractors import BaseMetadataExtractor @@ -14,56 +14,42 @@ ["N", "Second name", "Name", "Organization", "Phone", "Notes"], ["1", "Ivanov", "Ivan", "ISP RAS", "8-800"], ] -cell_with_meta = [[CellWithMeta(lines=[LineWithMeta(line=cell_text, - metadata=LineMetadata(page_id=0, - line_id=None), - annotations=[])]) for cell_text in row] for row in table_cells] -# table also has some metadata, lets assume that our table is on first page +cells_with_meta = [[CellWithMeta(lines=[LineWithMeta(line=cell_text, + metadata=LineMetadata(page_id=0, line_id=None), + annotations=[])]) for cell_text in row] for row in table_cells] +# table also has some metadata, let's assume that our table is on the first page table_metadata = TableMetadata(page_id=0, uid="table 1") # let's build table -table = Table(cells=cell_with_meta, metadata=table_metadata) +table = Table(cells=cells_with_meta, metadata=table_metadata) # Documents also contain some text. -# Logical structure of document may be represented by tree (see example_tree.png) -# but unstructured document consist of flat list of lines with text and metadata +# Logical structure of document may be represented by tree (see example_tree.png) +# but unstructured document consists of flat list of lines with text and metadata # hierarchy structure hidden in HierarchyLevel attribute of LineWithMeta -# lets build firs line, it is document tree root: +# let's build first line, it is document tree root: -# hierarchy level define position of this line in document tree. +# hierarchy level defines position of this line in a document tree. hierarchy_level = HierarchyLevel( # most important parameters of HierarchyLevel is level_1 and level_2 # hierarchy level compares by tuple (level_1, level_2) lesser -> closer to the root of the tree level_1=0, level_2=0, - # can_be_multiline and paragraph_type - some parts of the document (for example title) may take more - # than one line - # if can_be_multiline is true than several lines in a row with same level_1, level_2 and paragraph_type - # will be merged in one tree node + # can_be_multiline and line_type - some parts of the document (for example title) may take more than one line + # if can_be_multiline is true then several lines in a row with same level_1, level_2 and line_type will be merged in one tree node can_be_multiline=True, line_type="header" ) text = "DOCUMENT TITLE" -metadata = LineMetadata(page_id=0, - line_id=1, - tag_hierarchy_level=None, - hierarchy_level=hierarchy_level, - other_fields=None) +metadata = LineMetadata(page_id=0, line_id=1, tag_hierarchy_level=None, hierarchy_level=hierarchy_level, other_fields=None) -# Annotations: one may specify some information about some part of the text, for example that some word -# written in italic font. +# Annotations: one may specify some information about some part of the text, for example that some word written in italic font. annotations = [] -line1 = LineWithMeta( - line=text, - metadata=metadata, - annotations=annotations -) +line1 = LineWithMeta(line=text, metadata=metadata, annotations=annotations) -unstructured_document = UnstructuredDocument(tables=[table], - lines=[line1], - attachments=[]) +unstructured_document = UnstructuredDocument(tables=[table], lines=[line1], attachments=[]) # I hope you understand some concepts of the LineWithMeta, but you may ask why it need level_1 and level_2 # parameters. Why is only level_1 not enough. Imagine that we have lists like these: diff --git a/examples/example_doc_parser.py b/examples/example_doc_parser.py index a30960eb..bb1cc9f8 100644 --- a/examples/example_doc_parser.py +++ b/examples/example_doc_parser.py @@ -10,25 +10,25 @@ # we get unstructured file with lines and tables unstructured_document = docx_reader.read(path=file_name, document_type="example") -# let's look at content of unstructured_file, it consists of tables and lines +# let's look at the content of unstructured_file, it consists of tables and lines print(unstructured_document.tables, unstructured_document.lines) -# first of all lets look at the table +# first of all let's look at the table table = unstructured_document.tables[0] # table consists of cells (we assume that table is rectangle) -# so cells is list of rows and row is list of strings +# cell is a list of rows and row is a list of cells with metadata for row in table.cells: for cell in row: print(cell.get_text().replace("\n", "\t") + " ", end="") print("\n") -# there is also some metadata in table +# there is also some metadata in the table print(table.metadata) -# and now lets look at lines. lines it is list of object of class LineWithMeta +# and now let's look at lines. lines is a list of objects of class LineWithMeta lines = unstructured_document.lines -# let's look at first line +# let's look at the first line line = lines[0] print(line) -# line consist of line (text), metadata, hierarchy level +# line consists of line (text), metadata, hierarchy level print(line.line, line.metadata, line.metadata.hierarchy_level) diff --git a/examples/example_img_parser.py b/examples/example_img_parser.py index 589d651c..3f136cf1 100644 --- a/examples/example_img_parser.py +++ b/examples/example_img_parser.py @@ -11,21 +11,21 @@ # we get unstructured file with lines and tables unstructured_document = img_reader.read(path=file_name, document_type="example") -# let's look at content of unstructured_file, it consists of tables and lines +# let's look at the content of unstructured_file, it consists of tables and lines print(unstructured_document.tables, unstructured_document.lines) -# first of all lets look at the table +# first of all let's look at the table table = unstructured_document.tables[0] # table consists of cells (we assume that table is rectangle) -# so cells is list of rows and row is list of strings +# so cells is a list of rows and row is a list of cells with metadata print(table.cells) -# there is also some metadata in table +# there is also some metadata in the table print(table.metadata) -# and now lets look at lines. lines it is list of object of class LineWithMeta +# and now let's look at lines. lines is a list of objects of class LineWithMeta lines = unstructured_document.lines -# let's look at first line +# let's look at the first line line = lines[0] print(line) -# line consist of line (text), metadata, hierarchy level +# line consists of line (text), metadata, hierarchy level print(line.line, line.metadata, line.metadata.hierarchy_level) diff --git a/examples/example_manager_input.py b/examples/example_manager_input.py index bd5a1b41..afb9bfff 100644 --- a/examples/example_manager_input.py +++ b/examples/example_manager_input.py @@ -2,9 +2,8 @@ import json from dedoc import DedocManager -from dedoc.config import get_config -manager = DedocManager(config=get_config()) +manager = DedocManager() filename_docx = "example.docx" parsed_docx_document = manager.parse(file_path=filename_docx, parameters={}) diff --git a/examples/example_pdf_parser.py b/examples/example_pdf_parser.py index 5d7507b8..36568546 100644 --- a/examples/example_pdf_parser.py +++ b/examples/example_pdf_parser.py @@ -11,23 +11,23 @@ # we get unstructured file with lines and tables unstructured_document = pdf_txt_layer_reader.read(path=file_name, document_type="example") -# let's look at content of unstructured_file, it consists of tables and lines +# let's look at the content of unstructured_file, it consists of tables and lines print(unstructured_document.tables, unstructured_document.lines) -# first of all lets look at the table +# first of all let's look at the table table = unstructured_document.tables[0] # table consists of cells (we assume that table is rectangle) -# so cells is list of rows and row is list of strings +# so cells is a list of rows and row is a list of cells with metadata print(table.cells) -# there is also some metadata in table +# there is also some metadata in the table print(table.metadata) -# and now lets look at lines. lines it is list of object of class LineWithMeta +# and now let's look at lines. lines is a list of objects of class LineWithMeta lines = unstructured_document.lines # let's look at first line line = lines[0] print(line) -# line consist of line (text), metadata, hierarchy level +# line consists of line (text), metadata, hierarchy level print(line.line, line.metadata, line.metadata.hierarchy_level) @@ -40,21 +40,21 @@ # we get unstructured file with lines and tables unstructured_document = pdf_image_reader.read(path=file_name, document_type="example") -# let's look at content of unstructured_file, it consists of tables and lines +# let's look at the content of unstructured_file, it consists of tables and lines print(unstructured_document.tables, unstructured_document.lines) # first of all lets look at the table table = unstructured_document.tables[0] # table consists of cells (we assume that table is rectangle) -# so cells is list of rows and row is list of strings +# so cells is list of rows and row is list of cells with metadata print(table.cells) -# there is also some metadata in table +# there is also some metadata in the table print(table.metadata) -# and now lets look at lines. lines it is list of object of class LineWithMeta +# and now let's look at lines. lines is a list of objects of class LineWithMeta lines = unstructured_document.lines # let's look at first line line = lines[0] print(line) -# line consist of line (text), metadata, hierarchy level +# line consists of line (text), metadata, hierarchy level print(line.line, line.metadata, line.metadata.hierarchy_level) diff --git a/examples/example_post.py b/examples/example_post.py index 97317c81..1eaf3c61 100644 --- a/examples/example_post.py +++ b/examples/example_post.py @@ -14,9 +14,8 @@ # file we want to parse files = {'file': (file_name, file)} # dict with additional parameters - # to parse pdf with text layer add parameter "pdf_with_text_layer":"true" data = {"document_type": ""} - # and now we send post request with attached file and paremeters. + # and now we send post request with attached file and parameters. r = requests.post("http://localhost:1231/upload", files=files, data=data) # wait for response, parse json result and print it result = json.loads(r.content.decode())