word-count: Don't count no-break-hyphens as a word boundary

standardebooks · May 11, 2024 · f33a86c · f33a86c
1 parent 26bc937
commit f33a86c
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/se/formatting.py b/se/formatting.py
@@ -363,7 +363,7 @@ def get_word_count(xhtml: str) -> int:
 	xhtml = regex.sub(r"[…–—― ‘’“”\{\}\(\)]", " ", xhtml, flags=regex.IGNORECASE | regex.DOTALL)
 
 	# Remove word-connecting dashes, apostrophes, commas, and slashes (and/or), they count as a word boundry but they shouldn't
-	xhtml = regex.sub(r"[\p{Letter}0-9][\-\'\,\.\/][\p{Letter}0-9]", "aa", xhtml, flags=regex.IGNORECASE | regex.DOTALL)
+	xhtml = regex.sub(fr"[\p{{Letter}}0-9][\-\'\,\.\/{se.NO_BREAK_HYPHEN}{se.SHY_HYPHEN}][\p{{Letter}}0-9]", "aa", xhtml, flags=regex.IGNORECASE | regex.DOTALL)
 
 	# Replace sequential spaces with one space
 	xhtml = regex.sub(r"\s+", " ", xhtml, flags=regex.IGNORECASE | regex.DOTALL)