Rename ebook_info to ebook_flags, consolidate several other flags int…

…o it
standardebooks · Sep 5, 2023 · c0091af · c0091af
1 parent 4896285
commit c0091af
Showing 1 changed file with 60 additions and 62 deletions.
diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py
@@ -1293,15 +1293,15 @@ def _lint_svg_checks(filename: str, file_contents: str, svg_dom: se.easy_xml.Eas
 
 	return messages
 
-def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, ebook_info: dict, special_file: str, self) -> list:
+def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, ebook_flags: dict, special_file: str, self) -> list:
 	"""
 	Process error checks in “special” .xhtml files
 
 	INPUTS
 	filename: The name of the file being checked
 	dom: The dom of the file being checked
 	file_contents: The contents of the file being checked
-	ebook_info: A dictionary containing ebook information
+	ebook_flags: A dictionary containing ebook information
 	special_file: A string identifying the type of special file being checked
 	self
 
@@ -1354,14 +1354,14 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
 		if self.metadata_dom.xpath("/package/metadata/meta[@property='role' and text()='trl']") and "translated from" not in file_contents:
 			messages.append(LintMessage("m-025", "Translator found in metadata, but no [text]translated from LANG[/] block in colophon.", se.MESSAGE_TYPE_ERROR, filename))
 
-		if ebook_info["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
+		if ebook_flags["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
 			messages.append(LintMessage("m-074", "Multiple transcriptions found in metadata, but no link to [text]EBOOK_URL#transcriptions[/].", se.MESSAGE_TYPE_ERROR, filename))
 
-		if ebook_info["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
+		if ebook_flags["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
 			messages.append(LintMessage("m-075", "Multiple page scans found in metadata, but no link to [text]EBOOK_URL#page-scans[/].", se.MESSAGE_TYPE_ERROR, filename))
 
 		# Check that the formula changed from the default if we added 'various sources'
-		if ebook_info["has_multiple_transcriptions"] or ebook_info["has_multiple_page_scans"]:
+		if ebook_flags["has_multiple_transcriptions"] or ebook_flags["has_multiple_page_scans"]:
 			nodes = dom.xpath("/html/body//a[text() = 'various sources' and not(re:test(preceding-sibling::br[1]/preceding-sibling::node()[1], '(digital scans|transcriptions) from\\s*$'))]")
 			if nodes:
 				messages.append(LintMessage("t-072", "[text]various sources[/] link not preceded by [text]from[/].", se.MESSAGE_TYPE_ERROR, filename))
@@ -1407,12 +1407,12 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
 		# Are the sources represented correctly?
 		# We don't have a standard yet for more than two sources (transcription and scan) so just ignore that case for now.
 		# We can't merge this with the imprint check because imprint doesn't have `<br/>` between `the`
-		if not ebook_info["has_multiple_transcriptions"] and not ebook_info["has_other_sources"]:
+		if not ebook_flags["has_multiple_transcriptions"] and not ebook_flags["has_other_sources"]:
 			for link in source_links:
 				if "gutenberg.org" in link and f"<a href=\"{link}\">Project Gutenberg</a>" not in file_contents:
 					messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: [xhtml]<a href=\"{link}\">Project Gutenberg</a>[/].", se.MESSAGE_TYPE_ERROR, filename))
 
-		if not ebook_info["has_multiple_page_scans"] and not ebook_info["has_other_sources"]:
+		if not ebook_flags["has_multiple_page_scans"] and not ebook_flags["has_other_sources"]:
 			for link in source_links:
 				if "hathitrust.org" in link and f"the<br/>\n\t\t\t<a href=\"{link}\">HathiTrust Digital Library</a>" not in file_contents:
 					messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: [xhtml]the<br/> <a href=\"{link}\">HathiTrust Digital Library</a>[/].", se.MESSAGE_TYPE_ERROR, filename))
@@ -1436,25 +1436,25 @@ def _lint_special_file_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_
 		if missing_imprint_vars:
 			messages.append(LintMessage("m-036", "Variable not replaced with value.", se.MESSAGE_TYPE_ERROR, filename, missing_imprint_vars))
 
-		if ebook_info["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
+		if ebook_flags["has_multiple_transcriptions"] and not dom.xpath("/html/body//a[contains(@href, '#transcriptions')]"):
 			messages.append(LintMessage("m-074", "Multiple transcriptions found in metadata, but no link to [text]EBOOK_URL#transcriptions[/].", se.MESSAGE_TYPE_ERROR, filename))
 
-		if ebook_info["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
+		if ebook_flags["has_multiple_page_scans"] and not dom.xpath("/html/body//a[contains(@href, '#page-scans')]"):
 			messages.append(LintMessage("m-075", "Multiple page scans found in metadata, but no link to [text]EBOOK_URL#page-scans[/].", se.MESSAGE_TYPE_ERROR, filename))
 
 		# Check that the formula changed from the default if we added 'various sources'
-		if ebook_info["has_multiple_transcriptions"] or ebook_info["has_multiple_page_scans"]:
+		if ebook_flags["has_multiple_transcriptions"] or ebook_flags["has_multiple_page_scans"]:
 			nodes = dom.xpath("/html/body//a[text() = 'various sources' and not(re:test(preceding-sibling::node()[1], '(digital scans|transcriptions) from\\s*$'))]")
 			if nodes:
 				messages.append(LintMessage("t-072", "[text]various sources[/] link not preceded by [text]from[/].", se.MESSAGE_TYPE_ERROR, filename))
 
 		# Check for correctly named links. We can't merge this with the colophon check because the colophon breaks `the` with `<br/>`
-		if not ebook_info["has_multiple_transcriptions"] and not ebook_info["has_other_sources"]:
+		if not ebook_flags["has_multiple_transcriptions"] and not ebook_flags["has_other_sources"]:
 			for link in source_links:
 				if "gutenberg.org" in link and f"<a href=\"{link}\">Project Gutenberg</a>" not in file_contents:
 					messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: [xhtml]<a href=\"{link}\">Project Gutenberg</a>[/].", se.MESSAGE_TYPE_ERROR, filename))
 
-		if not ebook_info["has_multiple_page_scans"] and not ebook_info["has_other_sources"]:
+		if not ebook_flags["has_multiple_page_scans"] and not ebook_flags["has_other_sources"]:
 			for link in source_links:
 				if "hathitrust.org" in link and f"the <a href=\"{link}\">HathiTrust Digital Library</a>" not in file_contents:
 					messages.append(LintMessage("m-037", f"Transcription/page scan source link not found. Expected: the [xhtml]<a href=\"{link}\">HathiTrust Digital Library</a>[/].", se.MESSAGE_TYPE_ERROR, filename))
@@ -1667,7 +1667,7 @@ def _lint_xhtml_metadata_checks(filename: str, dom: se.easy_xml.EasyXmlTree) ->
 
 	return messages
 
-def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self, file_contents: str, ebook_info: dict, language: str) -> list:
+def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self, file_contents: str, ebook_flags: dict, language: str) -> list:
 	"""
 	Helper function used in self.lint()
 	Process syntax checks on an .xhtml file
@@ -1676,7 +1676,7 @@ def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self,
 	filename: The name of the file being checked
 	dom: A dom tree to check
 	file_contents: The contents of the file being checked
-	ebook_info: A dictionary containing several pieces of information about an ebook
+	ebook_flags: A dictionary containing several pieces of information about an ebook
 
 	OUTPUTS
 	A list of LintMessages representing syntax errors found in the file
@@ -2181,7 +2181,7 @@ def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self,
 		messages.append(LintMessage("s-086", "[text]Op. Cit.[/] or [text]Loc. Cit.[/] in endnote. Hint: [text]Op. Cit.[/] and [text]Loc. Cit.[/] mean [text]the previous reference[/], which usually doesn’t make sense in a popup endnote. Such references should be expanded.", se.MESSAGE_TYPE_WARNING, filename, [node.to_tag_string() for node in nodes]))
 
 	# Check for half title pages missing subtitles
-	if ebook_info["has_subtitle"]:
+	if ebook_flags["has_subtitle"]:
 		# Make sure we exclude <a> because that appears in the ToC landmarks
 		nodes = dom.xpath("/html/body//*[name()!='a' and contains(@epub:type, 'halftitlepage') and not(.//*[contains(@epub:type, 'subtitle')])]")
 		if nodes:
@@ -2252,7 +2252,7 @@ def _lint_xhtml_syntax_checks(filename: str, dom: se.easy_xml.EasyXmlTree, self,
 
 	return messages
 
-def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, special_file: str) -> list:
+def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str, special_file: str, ebook_flags: dict) -> list:
 	"""
 	Helper function used in self.lint()
 	Process typography checks on an .xhtml file
@@ -2267,7 +2267,6 @@ def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, f
 	A list of LintMessages representing typography errors found in the file
 	"""
 
-	has_images = False;
 	messages = [];
 
 	# Check for punctuation outside quotes. We don't check single quotes because contractions are too common.
@@ -2447,7 +2446,7 @@ def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, f
 	img_alt_lacking_punctuation = []
 	for node in nodes:
 		if "titlepage.svg" not in node.get_attr("src"):
-			has_images = True # Save for a later check
+			ebook_flags["has_images"] = True # Save for a later check
 
 		alt = node.get_attr("alt")
 
@@ -2735,7 +2734,7 @@ def _lint_xhtml_typography_checks(filename: str, dom: se.easy_xml.EasyXmlTree, f
 		if node_text != expected_text:
 			messages.append(LintMessage("t-073", f"Possible transcription error in Greek. Found: [text]{node_text}[/], but expected [text]{expected_text}[/text]. Hint: Use [bash]se unicode-names[/] to see differences in Unicode characters.", se.MESSAGE_TYPE_WARNING, filename))
 
-	return (messages, has_images)
+	return (messages)
 
 def _lint_xhtml_xhtml_checks(filename: str, dom: se.easy_xml.EasyXmlTree, file_contents: str) -> list:
 	"""
@@ -3151,18 +3150,15 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 	Check this ebook for some common SE style errors.
 
 	INPUTS
-	None
+	self
+	skip_lint_ignore: Flag indicating whether ignore file should be used
 
 	OUTPUTS
 	A list of LintMessage objects.
 	"""
 
 	local_css_path = self.content_path / "css/local.css"
 	messages: List[LintMessage] = []
-	is_titlepage = False
-	has_halftitle = False
-	has_frontmatter = False
-	has_cover_source = False
 	cover_svg_title = ""
 	titlepage_svg_title = ""
 	xhtml_css_classes: Dict[str, int] = {}
@@ -3171,9 +3167,32 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 	unused_selectors: List[str] = []
 	id_attrs: List[str] = []
 	abbr_elements_requiring_css: List[se.easy_xml.EasyXmlElement] = []
-	has_glossary_search_key_map = False
 	glossary_usage = []
-	has_images = False
+	short_story_count = 0
+	missing_styles: List[str] = []
+	directories_not_url_safe = []
+	files_not_url_safe = []
+	id_values = {}
+	duplicate_id_values = []
+	local_css = {
+		"has_poem_stye": False,
+		"has_verse_style": False,
+		"has_song_style": False,
+		"has_hymn_style": False,
+		"has_lyrics_style": False,
+		"has_elision_style": False
+	}
+	ebook_flags = {
+		"has_cover_source": False,
+		"has_frontmatter": False,
+		"has_glossary_search_key_map": False,
+		"has_halftitle": False,
+		"has_subtitle": bool(self.metadata_dom.xpath("/package/metadata/meta[@property='title-type' and text()='subtitle']")),
+		"has_images": False,
+		"has_multiple_transcriptions": False,
+		"has_multiple_page_scans": False,
+		"has_other_sources": False
+	}
 
 	# Cache the browser default stylesheet for later use
 	with importlib_resources.open_text("se.data", "browser.css", encoding="utf-8") as css:
@@ -3213,27 +3232,6 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 	local_css_selectors = [regex.sub(r"::[\p{Lowercase_Letter}\-]+", "", selector) for selector in local_css_rules]
 	unused_selectors = local_css_selectors.copy()
 
-	local_css = {
-		"has_poem_stye": False,
-		"has_verse_style": False,
-		"has_song_style": False,
-		"has_hymn_style": False,
-		"has_lyrics_style": False,
-		"has_elision_style": False
-	}
-	ebook_info = {
-		"has_subtitle": bool(self.metadata_dom.xpath("/package/metadata/meta[@property='title-type' and text()='subtitle']")),
-		"has_multiple_transcriptions": False,
-		"has_multiple_page_scans": False,
-		"has_other_sources": False
-	}
-	short_story_count = 0
-	missing_styles: List[str] = []
-	directories_not_url_safe = []
-	files_not_url_safe = []
-	id_values = {}
-	duplicate_id_values = []
-
 	(css_messages, local_css) = _lint_css_checks(self, local_css, local_css_path, local_css_rules)
 	if css_messages:
 		messages = messages + css_messages
@@ -3278,9 +3276,9 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 		else:
 			other_source_count = other_source_count + 1
 
-	ebook_info["has_multiple_transcriptions"] = transcription_source_count >= 2
-	ebook_info["has_multiple_page_scans"] = page_scan_source_count >= 2
-	ebook_info["has_other_sources"] = other_source_count > 0
+	ebook_flags["has_multiple_transcriptions"] = transcription_source_count >= 2
+	ebook_flags["has_multiple_page_scans"] = page_scan_source_count >= 2
+	ebook_flags["has_other_sources"] = other_source_count > 0
 
 	messages = messages + _lint_metadata_checks(self)
 
@@ -3343,7 +3341,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 
 			if filename.stem != "LICENSE":
 				if filename.stem == "cover.source":
-					has_cover_source = True
+					ebook_flags["has_cover_source"] = True
 				else:
 					url_safe_filename = se.formatting.make_url_safe(filename.stem) + filename.suffix
 					if filename.name != url_safe_filename:
@@ -3394,7 +3392,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 				# Make sure that everything in glossaries are in the rest of the text
 				# We’ll check the files later, and log any errors at the end
 				if filename.name == "glossary-search-key-map.xml":
-					has_glossary_search_key_map = True
+					ebook_flags["has_glossary_search_key_map"] = True
 					# Map the glossary to tuples of the values and whether they’re used (initially false)
 					glossary_usage = list(map(lambda node: (node.get_attr("value"), False), xml_dom.xpath(".//*[@value]")))
 
@@ -3439,12 +3437,12 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 
 				# Check if this is a frontmatter file, but exclude the titlepage, imprint, and toc
 				if dom.xpath("/html//*[contains(@epub:type, 'frontmatter') and not(descendant-or-self::*[re:test(@epub:type, '\\b(titlepage|imprint|toc)\\b')])]"):
-					has_frontmatter = True
+					ebook_flags["has_frontmatter"] = True
 
 				# Do we have a half title?
 				# Sometimes the half title might not be a section, like in Cane by Jean Toomer
 				if dom.xpath("/html/body//*[contains(@epub:type, 'halftitlepage')]"):
-					has_halftitle = True
+					ebook_flags["has_halftitle"] = True
 
 				# Add new CSS classes to global list
 				if filename.name not in IGNORED_FILENAMES:
@@ -3484,7 +3482,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 					abbr_elements_requiring_css += dom.xpath("/html/body//abbr[re:test(@epub:type, '\\b(se:temperature|se:era|z3998:acronym)\\b')]")
 
 				# Check and log missing glossary keys
-				if has_glossary_search_key_map and filename.name not in IGNORED_FILENAMES:
+				if ebook_flags["has_glossary_search_key_map"] and filename.name not in IGNORED_FILENAMES:
 					source_text = dom.xpath("/html/body")[0].inner_text()
 					if dom.xpath("/html/body//section[contains(@epub:type, 'glossary')]"):
 						nodes = dom.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]")
@@ -3513,17 +3511,17 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 					special_file = None
 
 				if special_file in SPECIAL_FILES:
-					messages = messages + _lint_special_file_checks(filename, dom, file_contents, ebook_info, special_file, self)
+					messages = messages + _lint_special_file_checks(filename, dom, file_contents, ebook_flags, special_file, self)
 
 				missing_styles = missing_styles + _update_missing_styles(filename, dom, local_css)
 
 				messages = messages + _lint_xhtml_css_checks(filename, dom, local_css_path)
 
 				messages = messages + _lint_xhtml_metadata_checks(filename, dom)
 
-				messages = messages + _lint_xhtml_syntax_checks(filename, dom, self, file_contents, ebook_info, language)
+				messages = messages + _lint_xhtml_syntax_checks(filename, dom, self, file_contents, ebook_flags, language)
 
-				(typography_messages, has_images) = _lint_xhtml_typography_checks(filename, dom, file_contents, special_file)
+				(typography_messages) = _lint_xhtml_typography_checks(filename, dom, file_contents, special_file, ebook_flags)
 				if typography_messages:
 					messages = messages + typography_messages
 
@@ -3534,10 +3532,10 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 	if self.cover_path and cover_svg_title != titlepage_svg_title:
 		messages.append(LintMessage("s-028", f"[path][link=file://{self.cover_path}]{self.cover_path.name}[/][/] and [path][link=file://{self.path / 'images/titlepage.svg'}]titlepage.svg[/][/] [xhtml]<title>[/] elements don’t match.", se.MESSAGE_TYPE_ERROR, self.cover_path))
 
-	if has_frontmatter and not has_halftitle:
+	if ebook_flags["has_frontmatter"] and not ebook_flags["has_halftitle"]:
 		messages.append(LintMessage("s-020", "Frontmatter found, but no half title page. Half title page is required when frontmatter is present.", se.MESSAGE_TYPE_ERROR, self.metadata_file_path))
 
-	if self.is_se_ebook and not has_cover_source:
+	if self.is_se_ebook and not ebook_flags["has_cover_source"]:
 		missing_files.append("images/cover.source.jpg")
 
 	missing_selectors = []
@@ -3659,7 +3657,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 			if f"[epub|type~=\"{value}\"]" not in self.local_css:
 				missing_styles.append(element.to_tag_string())
 
-	messages = messages + _lint_image_metadata_checks(self, has_images)
+	messages = messages + _lint_image_metadata_checks(self, ebook_flags["has_images"])
 
 	if missing_styles:
 		messages.append(LintMessage("c-006", f"Semantic found, but missing corresponding style in [path][link=file://{local_css_path}]local.css[/][/].", se.MESSAGE_TYPE_ERROR, local_css_path, set(missing_styles)))
@@ -3676,7 +3674,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
 	if short_story_count and not self.metadata_dom.xpath("//meta[@property='se:subject' and text() = 'Shorts']"):
 		messages.append(LintMessage("m-027", "[val]se:short-story[/] semantic inflection found, but no [val]se:subject[/] with the value of [text]Shorts[/].", se.MESSAGE_TYPE_ERROR, self.metadata_file_path))
 
-	if has_glossary_search_key_map:
+	if ebook_flags["has_glossary_search_key_map"]:
 		entries = []
 		for glossary_value in glossary_usage:
 			if glossary_value[1] is False: