diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py
index d380de70..867d403c 100644
--- a/se/se_epub_lint.py
+++ b/se/se_epub_lint.py
@@ -1293,7 +1293,7 @@ def _lint_svg_checks(filename: str, file_contents: str, svg_dom: se.easy_xml.Eas
return messages
-def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, ebook_info: dict, special_file: str, self) -> list:
+def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, ebook_flags: dict, special_file: str, self) -> list:
"""
Process error checks in “special” .xhtml files
@@ -1301,7 +1301,7 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
filename: The name of the file being checked
dom: The dom of the file being checked
file_contents: The contents of the file being checked
- ebook_info: A dictionary containing ebook information
+ ebook_flags: A dictionary containing ebook information
special_file: A string identifying the type of special file being checked
self
@@ -1354,14 +1354,14 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
if self.metadata_dom.xpath("/package/metadata/meta[@property='role' and text()='trl']") and "translated from" not in file_contents:
messages.append(LintMessage("m-025", "Translator found in metadata, but no [text]translated from LANG[/] block in colophon.", se.MESSAGE_TYPE_ERROR, filename))
- if ebook_info["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
+ if ebook_flags["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
messages.append(LintMessage("m-074", "Multiple transcriptions found in metadata, but no link to [text]EBOOK_URL#transcriptions[/].", se.MESSAGE_TYPE_ERROR, filename))
- if ebook_info["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
+ if ebook_flags["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
messages.append(LintMessage("m-075", "Multiple page scans found in metadata, but no link to [text]EBOOK_URL#page-scans[/].", se.MESSAGE_TYPE_ERROR, filename))
# Check that the formula changed from the default if we added 'various sources'
- if ebook_info["has_multiple_transcriptions"] or ebook_info["has_multiple_page_scans"]:
+ if ebook_flags["has_multiple_transcriptions"] or ebook_flags["has_multiple_page_scans"]:
nodes = dom.xpath("/html/body//a[text() = 'various sources' and not(re:test(preceding-sibling::br[1]/preceding-sibling::node()[1], '(digital scans|transcriptions) from\\s*$'))]")
if nodes:
messages.append(LintMessage("t-072", "[text]various sources[/] link not preceded by [text]from[/].", se.MESSAGE_TYPE_ERROR, filename))
@@ -1407,12 +1407,12 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
# Are the sources represented correctly?
# We don't have a standard yet for more than two sources (transcription and scan) so just ignore that case for now.
# We can't merge this with the imprint check because imprint doesn't have `
` between `the`
- if not ebook_info["has_multiple_transcriptions"] and not ebook_info["has_other_sources"]:
+ if not ebook_flags["has_multiple_transcriptions"] and not ebook_flags["has_other_sources"]:
for link in source_links:
if "gutenberg.org" in link and f"Project Gutenberg" not in file_contents:
messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: [xhtml]Project Gutenberg[/].", se.MESSAGE_TYPE_ERROR, filename))
- if not ebook_info["has_multiple_page_scans"] and not ebook_info["has_other_sources"]:
+ if not ebook_flags["has_multiple_page_scans"] and not ebook_flags["has_other_sources"]:
for link in source_links:
if "hathitrust.org" in link and f"the
\n\t\t\tHathiTrust Digital Library" not in file_contents:
messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: [xhtml]the
HathiTrust Digital Library[/].", se.MESSAGE_TYPE_ERROR, filename))
@@ -1436,25 +1436,25 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
if missing_imprint_vars:
messages.append(LintMessage("m-036", "Variable not replaced with value.", se.MESSAGE_TYPE_ERROR, filename, missing_imprint_vars))
- if ebook_info["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
+ if ebook_flags["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
messages.append(LintMessage("m-074", "Multiple transcriptions found in metadata, but no link to [text]EBOOK_URL#transcriptions[/].", se.MESSAGE_TYPE_ERROR, filename))
- if ebook_info["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
+ if ebook_flags["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
messages.append(LintMessage("m-075", "Multiple page scans found in metadata, but no link to [text]EBOOK_URL#page-scans[/].", se.MESSAGE_TYPE_ERROR, filename))
# Check that the formula changed from the default if we added 'various sources'
- if ebook_info["has_multiple_transcriptions"] or ebook_info["has_multiple_page_scans"]:
+ if ebook_flags["has_multiple_transcriptions"] or ebook_flags["has_multiple_page_scans"]:
nodes = dom.xpath("/html/body//a[text() = 'various sources' and not(re:test(preceding-sibling::node()[1], '(digital scans|transcriptions) from\\s*$'))]")
if nodes:
messages.append(LintMessage("t-072", "[text]various sources[/] link not preceded by [text]from[/].", se.MESSAGE_TYPE_ERROR, filename))
# Check for correctly named links. We can't merge this with the colophon check because the colophon breaks `the` with `
`
- if not ebook_info["has_multiple_transcriptions"] and not ebook_info["has_other_sources"]:
+ if not ebook_flags["has_multiple_transcriptions"] and not ebook_flags["has_other_sources"]:
for link in source_links:
if "gutenberg.org" in link and f"Project Gutenberg" not in file_contents:
messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: [xhtml]Project Gutenberg[/].", se.MESSAGE_TYPE_ERROR, filename))
- if not ebook_info["has_multiple_page_scans"] and not ebook_info["has_other_sources"]:
+ if not ebook_flags["has_multiple_page_scans"] and not ebook_flags["has_other_sources"]:
for link in source_links:
if "hathitrust.org" in link and f"the HathiTrust Digital Library" not in file_contents:
messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: the [xhtml]HathiTrust Digital Library[/].", se.MESSAGE_TYPE_ERROR, filename))
@@ -1667,7 +1667,7 @@ def _lint_xhtml_metadata_checks(filename: str, dom: se.easy_xml.EasyXmlTree) ->
return messages
-def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self, file_contents: str, ebook_info: dict, language: str) -> list:
+def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self, file_contents: str, ebook_flags: dict, language: str) -> list:
"""
Helper function used in self.lint()
Process syntax checks on an .xhtml file
@@ -1676,7 +1676,7 @@ def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self,
filename: The name of the file being checked
dom: A dom tree to check
file_contents: The contents of the file being checked
- ebook_info: A dictionary containing several pieces of information about an ebook
+ ebook_flags: A dictionary containing several pieces of information about an ebook
OUTPUTS
A list of LintMessages representing syntax errors found in the file
@@ -2181,7 +2181,7 @@ def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self,
messages.append(LintMessage("s-086", "[text]Op. Cit.[/] or [text]Loc. Cit.[/] in endnote. Hint: [text]Op. Cit.[/] and [text]Loc. Cit.[/] mean [text]the previous reference[/], which usually doesn’t make sense in a popup endnote. Such references should be expanded.", se.MESSAGE_TYPE_WARNING, filename, [node.to_tag_string() for node in nodes]))
# Check for half title pages missing subtitles
- if ebook_info["has_subtitle"]:
+ if ebook_flags["has_subtitle"]:
# Make sure we exclude because that appears in the ToC landmarks
nodes = dom.xpath("/html/body//*[name()!='a' and contains(@epub:type, 'halftitlepage') and not(.//*[contains(@epub:type, 'subtitle')])]")
if nodes:
@@ -2252,7 +2252,7 @@ def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self,
return messages
-def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, special_file: str) -> list:
+def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, special_file: str, ebook_flags: dict) -> list:
"""
Helper function used in self.lint()
Process typography checks on an .xhtml file
@@ -2267,7 +2267,6 @@ def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, f
A list of LintMessages representing typography errors found in the file
"""
- has_images = False;
messages = [];
# Check for punctuation outside quotes. We don't check single quotes because contractions are too common.
@@ -2447,7 +2446,7 @@ def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, f
img_alt_lacking_punctuation = []
for node in nodes:
if "titlepage.svg" not in node.get_attr("src"):
- has_images = True # Save for a later check
+ ebook_flags["has_images"] = True # Save for a later check
alt = node.get_attr("alt")
@@ -2735,7 +2734,7 @@ def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, f
if node_text != expected_text:
messages.append(LintMessage("t-073", f"Possible transcription error in Greek. Found: [text]{node_text}[/], but expected [text]{expected_text}[/text]. Hint: Use [bash]se unicode-names[/] to see differences in Unicode characters.", se.MESSAGE_TYPE_WARNING, filename))
- return (messages, has_images)
+ return (messages)
def _lint_xhtml_xhtml_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str) -> list:
"""
@@ -3151,7 +3150,8 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
Check this ebook for some common SE style errors.
INPUTS
- None
+ self
+ skip_lint_ignore: Flag indicating whether ignore file should be used
OUTPUTS
A list of LintMessage objects.
@@ -3159,10 +3159,6 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
local_css_path = self.content_path / "css/local.css"
messages: List[LintMessage] = []
- is_titlepage = False
- has_halftitle = False
- has_frontmatter = False
- has_cover_source = False
cover_svg_title = ""
titlepage_svg_title = ""
xhtml_css_classes: Dict[str, int] = {}
@@ -3171,9 +3167,32 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
unused_selectors: List[str] = []
id_attrs: List[str] = []
abbr_elements_requiring_css: List[se.easy_xml.EasyXmlElement] = []
- has_glossary_search_key_map = False
glossary_usage = []
- has_images = False
+ short_story_count = 0
+ missing_styles: List[str] = []
+ directories_not_url_safe = []
+ files_not_url_safe = []
+ id_values = {}
+ duplicate_id_values = []
+ local_css = {
+ "has_poem_stye": False,
+ "has_verse_style": False,
+ "has_song_style": False,
+ "has_hymn_style": False,
+ "has_lyrics_style": False,
+ "has_elision_style": False
+ }
+ ebook_flags = {
+ "has_cover_source": False,
+ "has_frontmatter": False,
+ "has_glossary_search_key_map": False,
+ "has_halftitle": False,
+ "has_subtitle": bool(self.metadata_dom.xpath("/package/metadata/meta[@property='title-type' and text()='subtitle']")),
+ "has_images": False,
+ "has_multiple_transcriptions": False,
+ "has_multiple_page_scans": False,
+ "has_other_sources": False
+ }
# Cache the browser default stylesheet for later use
with importlib_resources.open_text("se.data", "browser.css", encoding="utf-8") as css:
@@ -3213,27 +3232,6 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
local_css_selectors = [regex.sub(r"::[\p{Lowercase_Letter}\-]+", "", selector) for selector in local_css_rules]
unused_selectors = local_css_selectors.copy()
- local_css = {
- "has_poem_stye": False,
- "has_verse_style": False,
- "has_song_style": False,
- "has_hymn_style": False,
- "has_lyrics_style": False,
- "has_elision_style": False
- }
- ebook_info = {
- "has_subtitle": bool(self.metadata_dom.xpath("/package/metadata/meta[@property='title-type' and text()='subtitle']")),
- "has_multiple_transcriptions": False,
- "has_multiple_page_scans": False,
- "has_other_sources": False
- }
- short_story_count = 0
- missing_styles: List[str] = []
- directories_not_url_safe = []
- files_not_url_safe = []
- id_values = {}
- duplicate_id_values = []
-
(css_messages, local_css) = _lint_css_checks(self, local_css, local_css_path, local_css_rules)
if css_messages:
messages = messages + css_messages
@@ -3278,9 +3276,9 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
else:
other_source_count = other_source_count + 1
- ebook_info["has_multiple_transcriptions"] = transcription_source_count >= 2
- ebook_info["has_multiple_page_scans"] = page_scan_source_count >= 2
- ebook_info["has_other_sources"] = other_source_count > 0
+ ebook_flags["has_multiple_transcriptions"] = transcription_source_count >= 2
+ ebook_flags["has_multiple_page_scans"] = page_scan_source_count >= 2
+ ebook_flags["has_other_sources"] = other_source_count > 0
messages = messages + _lint_metadata_checks(self)
@@ -3343,7 +3341,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if filename.stem != "LICENSE":
if filename.stem == "cover.source":
- has_cover_source = True
+ ebook_flags["has_cover_source"] = True
else:
url_safe_filename = se.formatting.make_url_safe(filename.stem) + filename.suffix
if filename.name != url_safe_filename:
@@ -3394,7 +3392,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
# Make sure that everything in glossaries are in the rest of the text
# We’ll check the files later, and log any errors at the end
if filename.name == "glossary-search-key-map.xml":
- has_glossary_search_key_map = True
+ ebook_flags["has_glossary_search_key_map"] = True
# Map the glossary to tuples of the values and whether they’re used (initially false)
glossary_usage = list(map(lambda node: (node.get_attr("value"), False), xml_dom.xpath(".//*[@value]")))
@@ -3439,12 +3437,12 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
# Check if this is a frontmatter file, but exclude the titlepage, imprint, and toc
if dom.xpath("/html//*[contains(@epub:type, 'frontmatter') and not(descendant-or-self::*[re:test(@epub:type, '\\b(titlepage|imprint|toc)\\b')])]"):
- has_frontmatter = True
+ ebook_flags["has_frontmatter"] = True
# Do we have a half title?
# Sometimes the half title might not be a section, like in Cane by Jean Toomer
if dom.xpath("/html/body//*[contains(@epub:type, 'halftitlepage')]"):
- has_halftitle = True
+ ebook_flags["has_halftitle"] = True
# Add new CSS classes to global list
if filename.name not in IGNORED_FILENAMES:
@@ -3484,7 +3482,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
abbr_elements_requiring_css += dom.xpath("/html/body//abbr[re:test(@epub:type, '\\b(se:temperature|se:era|z3998:acronym)\\b')]")
# Check and log missing glossary keys
- if has_glossary_search_key_map and filename.name not in IGNORED_FILENAMES:
+ if ebook_flags["has_glossary_search_key_map"] and filename.name not in IGNORED_FILENAMES:
source_text = dom.xpath("/html/body")[0].inner_text()
if dom.xpath("/html/body//section[contains(@epub:type, 'glossary')]"):
nodes = dom.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]")
@@ -3513,7 +3511,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
special_file = None
if special_file in SPECIAL_FILES:
- messages = messages + _lint_special_file_checks(filename, dom, file_contents, ebook_info, special_file, self)
+ messages = messages + _lint_special_file_checks(filename, dom, file_contents, ebook_flags, special_file, self)
missing_styles = missing_styles + _update_missing_styles(filename, dom, local_css)
@@ -3521,9 +3519,9 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
messages = messages + _lint_xhtml_metadata_checks(filename, dom)
- messages = messages + _lint_xhtml_syntax_checks(filename, dom, self, file_contents, ebook_info, language)
+ messages = messages + _lint_xhtml_syntax_checks(filename, dom, self, file_contents, ebook_flags, language)
- (typography_messages, has_images) = _lint_xhtml_typography_checks(filename, dom, file_contents, special_file)
+ (typography_messages) = _lint_xhtml_typography_checks(filename, dom, file_contents, special_file, ebook_flags)
if typography_messages:
messages = messages + typography_messages
@@ -3534,10 +3532,10 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if self.cover_path and cover_svg_title != titlepage_svg_title:
messages.append(LintMessage("s-028", f"[path][link=file://{self.cover_path}]{self.cover_path.name}[/][/] and [path][link=file://{self.path / 'images/titlepage.svg'}]titlepage.svg[/][/] [xhtml][/] elements don’t match.", se.MESSAGE_TYPE_ERROR, self.cover_path))
- if has_frontmatter and not has_halftitle:
+ if ebook_flags["has_frontmatter"] and not ebook_flags["has_halftitle"]:
messages.append(LintMessage("s-020", "Frontmatter found, but no half title page. Half title page is required when frontmatter is present.", se.MESSAGE_TYPE_ERROR, self.metadata_file_path))
- if self.is_se_ebook and not has_cover_source:
+ if self.is_se_ebook and not ebook_flags["has_cover_source"]:
missing_files.append("images/cover.source.jpg")
missing_selectors = []
@@ -3659,7 +3657,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if f"[epub|type~=\"{value}\"]" not in self.local_css:
missing_styles.append(element.to_tag_string())
- messages = messages + _lint_image_metadata_checks(self, has_images)
+ messages = messages + _lint_image_metadata_checks(self, ebook_flags["has_images"])
if missing_styles:
messages.append(LintMessage("c-006", f"Semantic found, but missing corresponding style in [path][link=file://{local_css_path}]local.css[/][/].", se.MESSAGE_TYPE_ERROR, local_css_path, set(missing_styles)))
@@ -3676,7 +3674,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if short_story_count and not self.metadata_dom.xpath("//meta[@property='se:subject' and text() = 'Shorts']"):
messages.append(LintMessage("m-027", "[val]se:short-story[/] semantic inflection found, but no [val]se:subject[/] with the value of [text]Shorts[/].", se.MESSAGE_TYPE_ERROR, self.metadata_file_path))
- if has_glossary_search_key_map:
+ if ebook_flags["has_glossary_search_key_map"]:
entries = []
for glossary_value in glossary_usage:
if glossary_value[1] is False: