Skip to content

Commit

Permalink
Propagate non-block markup from figcaption
Browse files Browse the repository at this point in the history
  • Loading branch information
apasel422 committed Jun 28, 2024
1 parent 80df8ae commit 0a9b3cd
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 4 deletions.
21 changes: 18 additions & 3 deletions se/se_epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import base64
from copy import deepcopy
import datetime
import os
from pathlib import Path
Expand All @@ -16,6 +17,7 @@
import regex

import se
import se.css
import se.easy_xml
import se.formatting
import se.images
Expand Down Expand Up @@ -932,19 +934,32 @@ def generate_loi(self, loi_dom: se.easy_xml.EasyXmlTree) -> None:
for figure in dom.xpath("/html/body//figure[@id and img]"):
figure_id = figure.get_attr("id")

text = (figure.xpath("./img")[0].get_attr("alt") or "").strip()
entry = (figure.xpath("./img")[0].get_attr("alt") or "").strip()

figcaption = figure.xpath("./figcaption")
if figcaption:
figcaption_text = figcaption[0].inner_text()
# The alt text is probably more useful to the reader in this case.
if figcaption_text and not regex.search(r"^[Ff]igure\s+\d+$", figcaption_text):
text = figcaption_text
has_block = False
for tag in se.css.CSS_BLOCK_ELEMENTS:
if figcaption[0].xpath(f"./{tag}"):
has_block = True
break

# Try to retain semantic phrasing structure.
if not has_block:
entry = deepcopy(figcaption[0])

a = se.easy_xml.EasyXmlElement("<a/>")
a.set_text(text or f"Unable to auto-generate LoI text for #{figure_id}.")
a.set_attr("href", f"{file_path.name}#{figure_id}")

if isinstance(entry, str):
a.set_text(entry or f"Unable to auto-generate LoI text for #{figure_id}.")
else:
a.append(entry)
entry.unwrap()

p = se.easy_xml.EasyXmlElement("<p/>")
p.append(a)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
<img alt="Alt is preferred over Figure N figcaption."/>
<figcaption> <b> Figure 8 </b> </figcaption>
</figure>

<figure id="f-9">
<img alt="Alt is preferred over figcaption with block element."/>
<figcaption><h1>Figure 9</h1></figcaption>
</figure>
</section>
</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
</li>
<li>
<p>
<a href="chapter-1.xhtml#f-6">Text comes from figcaption inner_text.</a>
<a href="chapter-1.xhtml#f-6">Text comes from <b>figcaption</b> inner_text.</a>
</p>
</li>
<li>
Expand All @@ -45,6 +45,11 @@
<a href="chapter-1.xhtml#f-8">Alt is preferred over Figure N figcaption.</a>
</p>
</li>
<li>
<p>
<a href="chapter-1.xhtml#f-9">Alt is preferred over figcaption with block element.</a>
</p>
</li>
<li>
<p>
<a href="chapter-2.xhtml#f-0">Unable to auto-generate LoI text for #f-0.</a>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
<img alt="Alt is preferred over Figure N figcaption."/>
<figcaption> <b> Figure 8 </b> </figcaption>
</figure>

<figure id="f-9">
<img alt="Alt is preferred over figcaption with block element."/>
<figcaption><h1>Figure 9</h1></figcaption>
</figure>
</section>
</body>
</html>

0 comments on commit 0a9b3cd

Please sign in to comment.