From 83bd5df6e6f5204168aa72af970fb703d39ed02e Mon Sep 17 00:00:00 2001 From: Karel Vaculik Date: Wed, 30 Aug 2023 19:36:57 +0200 Subject: [PATCH] Heading functions can now use a Reference object --- CHANGELOG.md | 8 ++- docs/examples/references.py | 51 +++++++++++++++ docs/references.md | 6 +- examples/longer_report.py | 4 +- pyreball/__init__.py | 2 +- pyreball/__main__.py | 63 ++++++++++++++----- pyreball/html.py | 121 +++++++++++++++++++++--------------- tests/test_html.py | 91 ++++++++++++++++----------- tests/test_main.py | 23 ++++++- 9 files changed, 257 insertions(+), 112 deletions(-) create mode 100644 docs/examples/references.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ceb3044..46984b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,12 @@ ## 0.2.0 (2023-08-18) - Updated to newer versions of optional dependencies. -- Added new text elements `div` and `span`. Added new parameters to HTML elements - in particular `cl` and `attrs`. -- Deprecated `print_html` function. New `print` should be used instead. -- Deprecated `print_code` function. New `print_source_code` should be used instead. +- Added documentation. +- Added new text elements `div()` and `span()`. Added new parameters to HTML elements - in particular `cl` and `attrs`. +- Deprecated `print_html()` function. New `print()` should be used instead. +- Deprecated `print_code()` function. New `print_source_code()` should be used instead. - Replaced code-prettify with highlight.js for code blocks. +- `print_h1`, ..., `print_h6` functions can now take a `Reference` object. ## 0.1.1 (2021-09-14) diff --git a/docs/examples/references.py b/docs/examples/references.py new file mode 100644 index 0000000..a0df7ca --- /dev/null +++ b/docs/examples/references.py @@ -0,0 +1,51 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +import pyreball as pb +import seaborn as sns + +pb.set_title("References to Plots and Tables") + +ref_ch_1 = pb.Reference() + +pb.print_h1("First Table", reference=ref_ch_1) + +N = 10 +np.random.seed(1) +df = pd.DataFrame({"x": np.arange(1, N + 1), "y": np.random.random(N) * 4 + 3}) +pb.print_table(df, caption="A data table.") + +img_reference = pb.Reference() +table_ref = pb.Reference() +pb.print_div( + f"It is also possible to create references to tables and figures. " + f"For example Table {table_ref} shows sortable columns and " + f"Fig. {img_reference} displays a scatterplot. " + f"Each reference has a default text to be displayed, " + f"but this text can be overriden by using {pb.code('__call__()')} " + f"method on the reference when pasting it into the text. " + f"For example, here is a link to {img_reference('Scatterplot')}." +) +pb.print_table( + df, caption="A sortable table with a reference", reference=table_ref, sortable=True +) + +pb.print_table( + df, caption="A table sorted by y column", sorting_definition=("y", "asc") +) + +pb.print_h1("Charts") + +fig, ax = plt.subplots() +sns.scatterplot(x="x", y="y", ax=ax, data=df) +ax.set(xlabel="x", ylabel="y") +pb.plot_graph(fig, caption="A plot with a reference.", reference=img_reference) + +pb.print_div( + f"Note that you can use the references in your text multiple times, " + f"see again the reference to Table {table_ref} and Fig. {img_reference}. " + f"Of course, we cannot assign a single reference to multiple tables or figures. " + f"Last, but not least, one can use reference to Chapter {ref_ch_1}. " + f"Again, we can override the text and create a link to {ref_ch_1('First Chapter')}." +) diff --git a/docs/references.md b/docs/references.md index 6b07b3c..6320081 100644 --- a/docs/references.md +++ b/docs/references.md @@ -1,3 +1,7 @@ # References -TBD +To create anchors to various items on the HTML page, it is possible to use [`Reference`](../api/pyreball_html/#pyreball.html.Reference) class. + +{{ inline_source("docs/examples/references.py") }} + + diff --git a/examples/longer_report.py b/examples/longer_report.py index d10cdf8..e1b71d4 100644 --- a/examples/longer_report.py +++ b/examples/longer_report.py @@ -97,8 +97,8 @@ def factorial(n): pb.print_h2("References to Plots and Tables") # Creating a reference to a graph and a table: -img_reference = pb.create_reference() -table_ref = pb.create_reference() +img_reference = pb.Reference() +table_ref = pb.Reference() pb.print_div( f"It is also possible to create references to tables and figures. " f"For example Table {table_ref} shows sortable columns and Fig. {img_reference} displays a scatterplot." diff --git a/pyreball/__init__.py b/pyreball/__init__.py index 42daf3c..ab8de35 100644 --- a/pyreball/__init__.py +++ b/pyreball/__init__.py @@ -1,7 +1,7 @@ __version__ = "0.2.0-alpha.1" from pyreball.html import ( - create_reference, + Reference, plot_graph, plot_multi_graph, print, diff --git a/pyreball/__main__.py b/pyreball/__main__.py index 3d740ff..53e13dc 100644 --- a/pyreball/__main__.py +++ b/pyreball/__main__.py @@ -112,10 +112,16 @@ """ -def replace_ids(filename: Path) -> None: - # collect all ids in form of table-N-M +def _replace_ids(html_path: Path) -> None: + """Replace IDs of HTML elements to create working anchors based on references. + + Args: + html_path: Path to the HTML file. + """ + # collect all ids in form of "table-N-M", "img-N-M" all_table_and_img_ids = set() - with open(filename, "r") as f: + chapter_text_replacemenets = [] + with open(html_path, "r") as f: for line in f: # note that we don't need to replace only "table" ids by also "img" etc. results = re.findall(r"table-id[\d]+-[\d]+", line) @@ -124,10 +130,24 @@ def replace_ids(filename: Path) -> None: results = re.findall(r"img-id[\d]+-[\d]+", line) if results: all_table_and_img_ids.update(results) + # now collect heading references: + results = re.findall(r"ch_id[\d]+_[^\"]+", line) + if results: + all_table_and_img_ids.update(results) + # obtain also the heading text + search_result_text = re.search(results[0] + r"\">([^<]+)<", line) + link_text = search_result_text.group(1) if search_result_text else "" + search_result_id = re.search(r"_(id[\d]+)_", results[0]) + link_id = search_result_id.group(1) if search_result_id else "" + if link_id and link_text: + chapter_text_replacemenets.append( + (f">{link_id}<", f">{link_text}<") + ) + # Prepare all replacement definitions for a substitutor below replacements = [] for element_id in all_table_and_img_ids: + # Tables and images: re_results = re.search(r"(.+)-(id\d+)-(\d+)", element_id) - if re_results: # this must be first replacements.append( @@ -144,14 +164,29 @@ def replace_ids(filename: Path) -> None: ) ) + # Headings + re_results = re.search(r"ch_(id\d+)_(.+)", element_id) + if re_results: + # this must be first + replacements.append( + ( + "ref-" + re_results.group(1), + "ch_" + re_results.group(2), + ) + ) + # this must be second (because it would catch the first case as well) + replacements.append((element_id, f"ch_{re_results.group(2)}")) + # add also replacements for links to chapters + replacements += chapter_text_replacemenets + # replace all table-N-M with table-M and Table N with Table M substitutor = Substitutor(replacements=replacements) modified_lines = [] - with open(filename, "r") as f: + with open(html_path, "r") as f: for line in f: modified_lines.append(substitutor.sub(line)) - with open(filename, "w") as f: + with open(html_path, "w") as f: f.writelines(modified_lines) @@ -365,8 +400,8 @@ def get_config_directory() -> Path: # the config was generated, let's find out its directory config_directory = Path(Path(PATH_TO_CONFIG_LOCATION).read_text()) if ( - not (config_directory / CONFIG_INI_FILENAME).exists() - or not (config_directory / STYLES_TEMPLATE_FILENAME).exists() + not (config_directory / CONFIG_INI_FILENAME).exists() + or not (config_directory / STYLES_TEMPLATE_FILENAME).exists() ): logger.warning( f"{CONFIG_INI_FILENAME} or {STYLES_TEMPLATE_FILENAME} was not found in {config_directory}. " @@ -380,7 +415,7 @@ def get_config_directory() -> Path: def _get_output_dir_and_file_stem( - input_path: Path, output_path_str: Optional[str] + input_path: Path, output_path_str: Optional[str] ) -> Tuple[Path, str]: if not input_path.is_file(): raise ValueError(f"File {input_path} does not exist.") @@ -441,10 +476,10 @@ def main() -> None: carefully_remove_directory_if_exists(directory=Path(path_str)) script_definitions = ( - JAVASCRIPT_CHANGE_EXPAND - + JAVASCRIPT_ON_LOAD - + JAVASCRIPT_SORTABLE_TABLE - + JAVASCRIPT_ROLLING_PLOTS + JAVASCRIPT_CHANGE_EXPAND + + JAVASCRIPT_ON_LOAD + + JAVASCRIPT_SORTABLE_TABLE + + JAVASCRIPT_ROLLING_PLOTS ) css_definitions = get_css( @@ -476,7 +511,7 @@ def main() -> None: with open(html_path, "a") as f: f.write(html_end) - replace_ids(html_path) + _replace_ids(html_path) insert_heading_title_and_toc( filename=html_path, include_toc=parameters["toc"] == "yes" ) diff --git a/pyreball/html.py b/pyreball/html.py index 6203ab3..fd7cffa 100644 --- a/pyreball/html.py +++ b/pyreball/html.py @@ -68,21 +68,28 @@ class Reference: + """ + Class for creating references, i.e. anchors in HTML. + """ + def __init__(self, default_text: Optional[str] = None) -> None: - self.id = "id" + str(random.getrandbits(64)) + """Create a new reference. + + Args: + default_text: + """ + self.id = f"id{random.getrandbits(64)}" self.text = default_text def __str__(self) -> str: - return f'{self.id if self.text is None else self.text}' + return ( + f'{self.id if self.text is None else self.text}' + ) def __call__(self, text: str): return f'{text}' -def create_reference(default_text: Optional[str] = None) -> Reference: - return Reference(default_text) - - def _check_and_mark_reference(reference: Reference) -> None: """Check and save a reference. @@ -172,110 +179,124 @@ def _get_heading_number(level: int, l_heading_counting: List[int]) -> str: return ".".join(map(str, l_heading_counting[:level])) -def _print_heading(string: str, level: int = 1) -> None: +def _print_heading(string: str, level: int = 1, reference: Optional[Reference] = None) -> None: if level > 6: raise ValueError("Heading level cannot be greater than 6.") if level < 1: raise ValueError("Heading level cannot be less than 1.") - if not get_parameter_value("html_file_path") or get_parameter_value("keep_stdout"): - builtins.print("#" * level + " " + str(string)) + if "heading_index" not in _heading_memory: + _heading_memory["heading_index"] = 1 - if get_parameter_value("html_file_path"): - if "heading_index" not in _heading_memory: - _heading_memory["heading_index"] = 1 + heading_index = _heading_memory["heading_index"] - heading_index = _heading_memory["heading_index"] + if get_parameter_value("numbered_headings"): + if "heading_counting" not in _heading_memory: + # what is the index of current h1, h2, h3, h4, h5, h6? + _heading_memory["heading_counting"] = [0, 0, 0, 0, 0, 0] - if get_parameter_value("numbered_headings"): - if "heading_counting" not in _heading_memory: - # what is the index of current h1, h2, h3, h4, h5, h6? - _heading_memory["heading_counting"] = [0, 0, 0, 0, 0, 0] + # increase the number in the level + _heading_memory["heading_counting"][level - 1] = ( + _heading_memory["heading_counting"][level - 1] + 1 + ) + # reset all sub-levels + _heading_memory["heading_counting"][level:] = [0] * (6 - level) + # get the string of the numbered section and append non-breakable space + non_breakable_spaces = "\u00A0\u00A0" + heading_number_str = ( + _get_heading_number(level, _heading_memory["heading_counting"]) + + non_breakable_spaces + ) + else: + heading_number_str = "" - # increase the number in the level - _heading_memory["heading_counting"][level - 1] = ( - _heading_memory["heading_counting"][level - 1] + 1 - ) - # reset all sub-levels - _heading_memory["heading_counting"][level:] = [0] * (6 - level) - # get the string of the numbered section and append non-breakable space - non_breakable_spaces = "\u00A0\u00A0" - heading_number_str = ( - _get_heading_number(level, _heading_memory["heading_counting"]) - + non_breakable_spaces - ) - else: - heading_number_str = "" + string = heading_number_str + _reduce_whitespaces(string) + # use heading_index in the id of the heading so there are no collisions in the case of same texts + if reference: + _check_and_mark_reference(reference) + tidy_string = f"ch_{reference.id}_{_tidy_title(string)}_{heading_index}" + else: + tidy_string = f"ch_{_tidy_title(string)}_{heading_index}" - string = heading_number_str + _reduce_whitespaces(string) - # use heading_index in the id of the heading so there are no collisions in the case of same texts - tidy_string = _tidy_title(string) + "_" + str(heading_index) + if not get_parameter_value("html_file_path") or get_parameter_value("keep_stdout"): + builtins.print(string.replace("\u00A0\u00A0", " ")) + + if get_parameter_value("html_file_path"): pilcrow_sign = "\u00B6" - string = ( + header_contents = ( string + f'{pilcrow_sign}' ) - _write_to_html(f'{string}') + # For correct functioning of references, it is expected that single line contains at most one heading, + # and the heading is whole there with all links. + _write_to_html(f'{header_contents}') _heading_memory["heading_index"] += 1 -def print_h1(string: str) -> None: + +def print_h1(string: str, reference: Optional[Reference] = None) -> None: """ Print h1 heading. Args: string: Content of the heading. + reference: Reference object. """ - _print_heading(string, level=1) + _print_heading(string, level=1, reference=reference) -def print_h2(string: str) -> None: +def print_h2(string: str, reference: Optional[Reference] = None) -> None: """ Print h2 heading. Args: string: Content of the heading. + reference: Reference object. """ - _print_heading(string, level=2) + _print_heading(string, level=2, reference=reference) -def print_h3(string: str) -> None: +def print_h3(string: str, reference: Optional[Reference] = None) -> None: """ Print h3 heading. Args: string: Content of the heading. + reference: Reference object. """ - _print_heading(string, level=3) + _print_heading(string, level=3, reference=reference) -def print_h4(string: str) -> None: +def print_h4(string: str, reference: Optional[Reference] = None) -> None: """ Print h4 heading. Args: string: Content of the heading. + reference: Reference object. """ - _print_heading(string, level=4) + _print_heading(string, level=4, reference=reference) -def print_h5(string: str) -> None: +def print_h5(string: str, reference: Optional[Reference] = None) -> None: """ Print h5 heading. Args: string: Content of the heading. + reference: Reference object. """ - _print_heading(string, level=5) + _print_heading(string, level=5, reference=reference) -def print_h6(string: str) -> None: +def print_h6(string: str, reference: Optional[Reference] = None) -> None: """ Print h6 heading. Args: string: Content of the heading. + reference: Reference object. """ - _print_heading(string, level=6) + _print_heading(string, level=6, reference=reference) def print_div( @@ -404,9 +425,9 @@ def _prepare_table_html( table_html = df.to_html(classes=table_classes, **kwargs) if reference: _check_and_mark_reference(reference) - anchor_link = "table-" + reference.id + "-" + str(tab_index) + anchor_link = f"table-{reference.id}-{tab_index}" else: - anchor_link = "table-" + str(tab_index) + anchor_link = f"table-{tab_index}" caption_element = _prepare_caption_element( prefix="Table", diff --git a/tests/test_html.py b/tests/test_html.py index a93c3ed..b70bbe3 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -34,7 +34,6 @@ _tidy_title, _wrap_plot_element_by_outer_divs, _write_to_html, - create_reference, plot_graph, plot_multi_graph, print as print_html, @@ -138,15 +137,6 @@ def test_reference__with_default_text_and_text_override(): assert regex_match is not None -def test_create_reference(): - ref = create_reference() - assert isinstance(ref, Reference) - - ref_string = str(create_reference("whatever")) - regex_match = re.match(r'^whatever$', ref_string) - assert regex_match is not None - - def test__check_and_mark_reference(pre_test_check_and_mark_reference_cleanup): ref1 = Reference() ref2 = Reference() @@ -267,7 +257,7 @@ def test__print_heading__unsupported_level(level, pre_test_print_heading_cleanup def test__print_heading__stdout(capsys, pre_test_print_heading_cleanup): def fake_get_parameter_value(key): - return key == "keep_stdout" + return key in ["keep_stdout", "numbered_headings"] def fake_get_parameter_value_different(key): if key == "keep_stdout": @@ -281,7 +271,7 @@ def fake_get_parameter_value_different(key): ): _print_heading("simple heading", level=3) captured = capsys.readouterr() - assert "### simple heading" in captured.out + assert "0.0.1 simple heading" in captured.out # when keep_stdout is set off, but we don't have html file either with mock.patch( @@ -290,12 +280,18 @@ def fake_get_parameter_value_different(key): ): _print_heading("another heading", level=5) captured = capsys.readouterr() - assert "##### another heading" in captured.out + assert "another heading" in captured.out @pytest.mark.parametrize("keep_stdout", [False, True]) +@pytest.mark.parametrize("use_reference", [False, True]) def test_print_h1_h6__file_output__no_numbers( - keep_stdout, capsys, simple_html_file, pre_test_print_heading_cleanup + keep_stdout, + use_reference, + capsys, + simple_html_file, + pre_test_print_heading_cleanup, + pre_test_check_and_mark_reference_cleanup, ): def fake_get_parameter_value(key): if key == "html_file_path": @@ -308,7 +304,15 @@ def fake_get_parameter_value(key): with mock.patch( "pyreball.html.get_parameter_value", side_effect=fake_get_parameter_value ): - print_h1("heading 1") + if use_reference: + ref = Reference() + ref.id = "id123" + exp_id = "id123_" + else: + ref = None + exp_id = "" + + print_h1("heading 1", reference=ref) print_h3("heading 3") print_h6("heading 6") print_h4("heading 4") @@ -317,12 +321,12 @@ def fake_get_parameter_value(key): expected_result = ( "\n" - '

heading 1\u00B6

\n' - '

heading 3\u00B6

\n' - '
heading 6\u00B6
\n' - '

heading 4\u00B6

\n' - '

heading 2\u00B6

\n' - '
heading 5\u00B6
\n' + f'

heading 1\u00B6

\n' + '

heading 3\u00B6

\n' + '
heading 6\u00B6
\n' + '

heading 4\u00B6

\n' + '

heading 2\u00B6

\n' + '
heading 5\u00B6
\n' ) with open(simple_html_file, "r") as f: @@ -331,10 +335,7 @@ def fake_get_parameter_value(key): captured = capsys.readouterr() expected_stdout = ( - ( - "# heading 1\n### heading 3\n###### heading 6\n" - "#### heading 4\n## heading 2\n##### heading 5" - ) + ("heading 1\nheading 3\nheading 6\n" "heading 4\nheading 2\nheading 5") if keep_stdout else "" ) @@ -342,8 +343,14 @@ def fake_get_parameter_value(key): @pytest.mark.parametrize("keep_stdout", [False, True]) +@pytest.mark.parametrize("use_reference", [False, True]) def test_print_h1_h6__file_output__with_numbers( - keep_stdout, capsys, simple_html_file, pre_test_print_heading_cleanup + keep_stdout, + use_reference, + capsys, + simple_html_file, + pre_test_print_heading_cleanup, + pre_test_check_and_mark_reference_cleanup, ): def fake_get_parameter_value(key): if key == "html_file_path": @@ -356,7 +363,15 @@ def fake_get_parameter_value(key): with mock.patch( "pyreball.html.get_parameter_value", side_effect=fake_get_parameter_value ): - print_h1("he 1") + if use_reference: + ref = Reference() + ref.id = "id123" + exp_id = "id123_" + else: + ref = None + exp_id = "" + + print_h1("he 1", reference=ref) print_h2("he 2") print_h3("he 3") print_h3("he 3") @@ -368,15 +383,15 @@ def fake_get_parameter_value(key): expected_result = ( "\n" - '

1\u00A0\u00A0he 1\u00B6

\n' - '

1.1\u00A0\u00A0he 2\u00B6

\n' - '

1.1.1\u00A0\u00A0he 3\u00B6

\n' - '

1.1.2\u00A0\u00A0he 3\u00B6

\n' - '

1.2\u00A0\u00A0he 2\u00B6

\n' - '

2\u00A0\u00A0he 1\u00B6

\n' - '

2.1\u00A0\u00A0he 2\u00B6

\n' - '

2.2\u00A0\u00A0he 2\u00B6

\n' - '

2.2.1\u00A0\u00A0he 3\u00B6

\n' + f'

1\u00A0\u00A0he 1\u00B6

\n' + '

1.1\u00A0\u00A0he 2\u00B6

\n' + '

1.1.1\u00A0\u00A0he 3\u00B6

\n' + '

1.1.2\u00A0\u00A0he 3\u00B6

\n' + '

1.2\u00A0\u00A0he 2\u00B6

\n' + '

2\u00A0\u00A0he 1\u00B6

\n' + '

2.1\u00A0\u00A0he 2\u00B6

\n' + '

2.2\u00A0\u00A0he 2\u00B6

\n' + '

2.2.1\u00A0\u00A0he 3\u00B6

\n' ) with open(simple_html_file, "r") as f: @@ -386,8 +401,8 @@ def fake_get_parameter_value(key): captured = capsys.readouterr() expected_stdout = ( ( - "# he 1\n## he 2\n### he 3\n### he 3\n## he 2\n" - "# he 1\n## he 2\n## he 2\n### he 3" + "1 he 1\n1.1 he 2\n1.1.1 he 3\n1.1.2 he 3\n1.2 he 2\n" + "2 he 1\n2.1 he 2\n2.2 he 2\n2.2.1 he 3" ) if keep_stdout else "" diff --git a/tests/test_main.py b/tests/test_main.py index 6eb5552..865053d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -5,8 +5,8 @@ from pyreball.__main__ import ( _get_output_dir_and_file_stem, _parse_heading_info, + _replace_ids, insert_heading_title_and_toc, - replace_ids, ) @@ -47,9 +47,26 @@ "", ], ), + ( + [ + "", + 'Reference to chapter id123', + '

My Chapter' + '\u00B6

', + 'Reference to chapter id123 again', + "", + ], + [ + "", + 'Reference to chapter My Chapter', + '

My Chapter\u00B6

', + 'Reference to chapter My Chapter again', + "", + ], + ), ], ) -def test_replace_ids(report_before, report_after, tmpdir): +def test__replace_ids(report_before, report_after, tmpdir): report_dir = Path(tmpdir) report_dir.mkdir(parents=True, exist_ok=True) report_path = report_dir / "report.py" @@ -57,7 +74,7 @@ def test_replace_ids(report_before, report_after, tmpdir): with open(report_path, "w") as f: f.write("\n".join(report_before)) - replace_ids(report_path) + _replace_ids(report_path) with open(report_path) as f: result = f.read().split("\n")