Skip to content

Commit

Permalink
parse existing tables from markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
Krande committed Jan 18, 2024
1 parent 5d35ad6 commit 3bf0027
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 13 deletions.
14 changes: 14 additions & 0 deletions files/doc_regular_table/00-main/table.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# A basic table

Some text before the table

| | cat A [unit] | cat 2 [unitB] | num ex [-] |
|:----------|-------------:|--------------:|-----------:|
| example1 | 4000 | 1.13 | 6 |
| example4 | 4000 | 2.15 | 6 |
| example9 | 4000 | 4.04 | 6 |
| example10 | 4500 | 2 | 6 |

Table: A basic table {#tbl:a-basic-table}

And some text after
5 changes: 5 additions & 0 deletions files/doc_regular_table/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lang: en-GB
linkReferences: true
nameInLink: true
figPrefix: "Figure"
tblPrefix: "Table"
2 changes: 1 addition & 1 deletion src/paradoc/cli_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def main(
source_dir: str,
report_name: str,
auto_open: bool = False,
work_dir: str = None,
work_dir: str = "temp",
export_format: ExportFormats = ExportFormats.DOCX,
):
one = OneDoc(source_dir, work_dir=work_dir)
Expand Down
38 changes: 37 additions & 1 deletion src/paradoc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class Table:
add_link: bool = True
md_instances: List[MarkDownFile] = field(default_factory=list)
docx_instances: List[object] = field(default_factory=list)
link_name_override: str = None

def __post_init__(self):
if self.df is None:
Expand All @@ -61,9 +62,40 @@ def to_markdown(self, include_name_in_cell=False, flags=None):
return tbl_str
tbl_str += f"\n\nTable: {self.caption}"
if self.add_link:
tbl_str += f" {{#tbl:{self.name}}}"
if self.link_name_override is None:
link_name = self.name
else:
link_name = self.link_name_override

tbl_str += f" {{#tbl:{link_name}}}"
return tbl_str

@staticmethod
def from_markdown_str(table_str: str) -> Table:
"""Parse a markdown table string and return a Table instance"""
lines = table_str.splitlines()
header = [x.strip() for x in lines[0].split("|")[1:-1]]
data = []
table_caption_str = None
for line in lines[2:]:
if line == "":
continue
if line.strip().startswith("Table:"):
table_caption_str = line.strip()
break
data.append([x.strip() for x in line.split("|")[1:-1]])

caption = table_caption_str.split("Table:")[1].strip()
caption = caption.split('{')[0].strip()
# Create a pandas DataFrame using the extracted header and data rows
df = pd.DataFrame(data, columns=header)
name = str(df.values[0][0])
tbl_ref = re.search(r"{#tbl:(.*?)}", table_str)
link_override = None
if tbl_ref is not None:
link_override = tbl_ref.group(1)
return Table(name=name, df=df, caption=caption, link_name_override=link_override)


@dataclass
class Figure:
Expand Down Expand Up @@ -110,6 +142,10 @@ def get_figures(self):
regx = re.compile(r'<img src="(?P<file_path>.*?)" alt="(?P<caption>.*?)"\s*(?:width="(?P<width>.*?)"|)\/>')
yield from regx.finditer(self.read_original_file())

def get_tables(self):
regx = re.compile(r'(\|.*?\nTable:.*?$)', re.MULTILINE | re.DOTALL)
yield from regx.finditer(self.read_original_file())


class ExportFormats(str, Enum):
DOCX = "docx"
Expand Down
26 changes: 15 additions & 11 deletions src/paradoc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,16 @@ class OneDoc:
FORMATS = ExportFormats

def __init__(
self,
source_dir=None,
main_prefix="00-main",
app_prefix="01-app",
clean_build_dir=True,
create_dirs=False,
output_dir=None,
work_dir="temp",
use_default_html_style=True,
**kwargs,
self,
source_dir=None,
main_prefix="00-main",
app_prefix="01-app",
clean_build_dir=True,
create_dirs=False,
output_dir=None,
work_dir="temp",
use_default_html_style=True,
**kwargs,
):
self.source_dir = pathlib.Path().resolve().absolute() if source_dir is None else pathlib.Path(source_dir)
self.work_dir = pathlib.Path(work_dir).resolve().absolute()
Expand Down Expand Up @@ -138,7 +138,7 @@ def _setup(self, create_dirs, clean_build_dir):
# Check if the figure is commented out
# Get first newline right before regex search found start and till the end (capture entire line)
start = fig.string[: fig.start()].rfind("\n") + 1
end = fig.string[fig.start() :].find("\n") + fig.start()
end = fig.string[fig.start():].find("\n") + fig.start()
line = fig.string[start:end]
if line.startswith("[//]: #"):
continue
Expand All @@ -155,6 +155,10 @@ def _setup(self, create_dirs, clean_build_dir):
)
self.figures[caption] = Figure(name, caption, ref, file_path, md_instance=md_file)

for re_table in md_file.get_tables():
table = Table.from_markdown_str(re_table.group(1))
self.tables[table.name] = table

if clean_build_dir is True:
shutil.rmtree(self.build_dir, ignore_errors=True)

Expand Down
7 changes: 7 additions & 0 deletions tests/tables/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,10 @@ def test_table(files_dir, test_dir):
one.add_table("my_table_5", df, "No Space 3")

one.compile("TableDoc")


def test_regular_table(files_dir, test_dir):
report_dir = files_dir / "doc_regular_table"
one = OneDoc(report_dir, work_dir=test_dir / "doc_regular_table")

one.compile("TableDoc", export_format="docx")

0 comments on commit 3bf0027

Please sign in to comment.