Skip to content

Commit

Permalink
TLDR-475 fix table documentation (#338)
Browse files Browse the repository at this point in the history
* TLDR-475 fix table documentation

* Small fixes
  • Loading branch information
NastyBoget committed Sep 28, 2023
1 parent f0be0db commit d752ad2
Show file tree
Hide file tree
Showing 20 changed files with 1,018 additions and 526 deletions.
13 changes: 8 additions & 5 deletions dedoc/data_structures/cell_with_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,23 @@

class CellWithMeta:
"""
This class holds the information about the cell information: text of the cell, text annotations and cell properties (rowspan, colspan, invisible).
This class holds the information about the cell: list of lines and cell properties (rowspan, colspan, invisible).
"""
def __init__(self, lines: List[LineWithMeta], colspan: int = 1, rowspan: int = 1, invisible: bool = False) -> None:
"""
:param lines: text lines (LineWithMeta) of the cell
:param colspan: The value of the rowspan attribute represents the number of columns to span. Like HTML format.
:param rowspan: The value of the rowspan attribute represents the number of rows to span. Like HTML format.
:param invisible: Display or hide cell values
:param lines: textual lines of the cell
:param colspan: number of columns to span like in HTML format
:param rowspan: number of rows to span like in HTML format
:param invisible: indicator for displaying or hiding cell text
"""
self.lines = lines
self.colspan = colspan
self.rowspan = rowspan
self.invisible = invisible

def __repr__(self) -> str:
return f"CellWithMeta({self.get_text()[:65]})"

def get_text(self) -> str:
return "\n".join([line.line for line in self.lines])

Expand Down
4 changes: 2 additions & 2 deletions dedoc/data_structures/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class Table(Serializable):
"""
def __init__(self, cells: List[List[CellWithMeta]], metadata: TableMetadata) -> None:
"""
:param cells: a list of lists of cells (cell has text, colspan and rowspan attributes).
:param metadata: some table metadata, as location, size and so on.
:param cells: a list of lists of cells (cell has text, colspan and rowspan attributes)
:param metadata: some table metadata as location, size and so on
"""
self.metadata = metadata
self.cells = cells
Expand Down
5 changes: 2 additions & 3 deletions dedoc/data_structures/table_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

class TableMetadata(Serializable):
"""
This class holds the information about the table location in the document and information about cell properties.
This class holds the information about table unique identifier, rotation angle (if table has been rotated - for images) and so on.
"""
def __init__(self, page_id: Optional[int], uid: Optional[str] = None, is_inserted: bool = False, rotated_angle: float = 0.0) -> None:
"""
:param page_id: number of the page where table starts
:param uid: unique identifier of the table
:param is_inserted: indicator if table was already inserted into paragraphs list
:param rotated_angle: the value of the rotation angle by which the table was rotated during recognition. Extracted boxes from a table will need to
be rotated by this angle.
:param rotated_angle: value of the rotation angle by which the table was rotated during recognition
"""
self.page_id = page_id
self.uid = str(uuid.uuid1()) if not uid else uid
Expand Down
12 changes: 6 additions & 6 deletions docs/source/_static/code_examples/dedoc_usage_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@
document.lines[3].annotations[7] # Italic(6:12, True)
document.lines[3].annotations[8] # Size(14:19, 10.0)

document.tables[0].cells[0][0].get_text() # N
document.tables[0].cells[1][3].get_text() # Cell3
document.tables[1].cells[3][0].get_text() # 'Text 3'
cell = document.tables[0].cells[0][0]
cell # CellWithMeta(N)
cell.get_text() # N
cell.rowspan, cell.colspan, cell.invisible # (1, 1, False)
document.tables[0].metadata.uid # f2f08354fc2dbcb5ded8885479f498a6
document.tables[0].cells[0][0].colspan # 1
document.tables[0].cells[0][0].rowspan # 1
document.tables[0].cells[0][0].invisible # False
document.tables[0].metadata.page_id # None
document.tables[0].metadata.rotated_angle # 0.0
document.tables[1].cells[0][0].invisible # False
document.tables[1].cells[0][1].invisible # True
document.tables[1].cells[0][0].colspan # 2
Expand Down
168 changes: 100 additions & 68 deletions docs/source/_static/json_format_examples/basic_example.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "2023.05.26",
"version": "0.11.2",
"warnings": [],
"content": {
"structure": {
Expand Down Expand Up @@ -298,7 +298,7 @@
"start": 0,
"end": 14,
"name": "attachment",
"value": "image1.png"
"value": "attach_fa1143ae-5d3c-11ee-b518-0242ac120002"
}
],
"metadata": {
Expand All @@ -321,96 +321,128 @@
{
"cells": [
[
"Table header",
"Table header"
{
"lines": [
{
"text": "Table header",
"annotations": []
}
],
"colspan": 2,
"rowspan": 1,
"invisible": false
},
{
"lines": [
{
"text": "Table header",
"annotations": []
}
],
"colspan": 1,
"rowspan": 1,
"invisible": true
}
],
[
"Vertically merged cells",
"Text 1"
{
"lines": [
{
"text": "Vertically merged cells",
"annotations": []
}
],
"colspan": 1,
"rowspan": 2,
"invisible": false
},
{
"lines": [
{
"text": "Text 1",
"annotations": []
}
],
"colspan": 1,
"rowspan": 1,
"invisible": false
}
],
[
"Vertically merged cells",
"Text 2"
{
"lines": [
{
"text": "Vertically merged cells",
"annotations": []
}
],
"colspan": 1,
"rowspan": 1,
"invisible": true
},
{
"lines": [
{
"text": "Text 2",
"annotations": []
}
],
"colspan": 1,
"rowspan": 1,
"invisible": false
}
],
[
"Text 3",
"Text 4"
{
"lines": [
{
"text": "Text 3",
"annotations": []
}
],
"colspan": 1,
"rowspan": 1,
"invisible": false
},
{
"lines": [
{
"text": "Text 4",
"annotations": []
}
],
"colspan": 1,
"rowspan": 1,
"invisible": false
}
]
],
"metadata": {
"uid": "3a327789721e09b3fa6fd9560f3ee263",
"page_id": null,
"is_inserted": false,
"cell_properties": [
[
{
"colspan": 2,
"rowspan": 1,
"invisible": false
},
{
"colspan": 1,
"rowspan": 1,
"invisible": true
}
],
[
{
"colspan": 1,
"rowspan": 2,
"invisible": false
},
{
"colspan": 1,
"rowspan": 1,
"invisible": false
}
],
[
{
"colspan": 1,
"rowspan": 1,
"invisible": true
},
{
"colspan": 1,
"rowspan": 1,
"invisible": false
}
],
[
{
"colspan": 1,
"rowspan": 1,
"invisible": false
},
{
"colspan": 1,
"rowspan": 1,
"invisible": false
}
]
]
"rotated_angle": 0.0
}
}
]
},
"metadata": {
"uid": "doc_uid_auto_5cbfdc00-0e90-11ee-8789-4549ad8e7206",
"uid": "doc_uid_auto_fa1f6786-5d3c-11ee-b518-0242ac120002",
"file_name": "example_return_format.docx",
"temporary_file_name": "1695822696_268.docx",
"size": 21270,
"modified_time": 1687172368,
"created_time": 1687172368,
"access_time": 1687172368,
"modified_time": 1695822696,
"created_time": 1695822696,
"access_time": 1695822696,
"file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"document_subject": "",
"keywords": "",
"category": "",
"comments": "",
"author": "",
"last_modified_by": "",
"created_date": 1568736411,
"modified_date": 1686923436,
"created_date": 1568725611,
"modified_date": 1686912636,
"last_printed_date": null,
"other_fields": {
"document_subject": "",
Expand All @@ -419,8 +451,8 @@
"comments": "",
"author": "",
"last_modified_by": "",
"created_date": 1568736411,
"modified_date": 1686923436,
"created_date": 1568725611,
"modified_date": 1686912636,
"last_printed_date": null
}
},
Expand Down
Loading

0 comments on commit d752ad2

Please sign in to comment.