diff --git a/se/formatting.py b/se/formatting.py
index 50186ed5..966b839d 100644
--- a/se/formatting.py
+++ b/se/formatting.py
@@ -50,31 +50,34 @@ def semanticate(xhtml: str) -> str:
"""
# Some common abbreviations
- xhtml = regex.sub(r"(?]*?\>))Mr\.", r"""Mr.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mrs\.", r"""Mrs.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Ms\.", r"""Ms.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Dr\.", r"""Dr.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Drs\.", r"""Drs.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Prof\.", r"""Prof.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Rev\.", r"""Rev.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Hon\.", r"""Hon.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Lieut\.", r"""Lieut.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Fr\.", r"""Fr.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Lt\.", r"""Lt.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Capt\.", r"""Capt.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Pvt\.", r"""Pvt.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Esq\.", r"""Esq.""", xhtml)
+ xhtml = regex.sub(r"(?]*?\>))(\L\.)", r"""\1""", xhtml, titles=[
+ "Capt",
+ "Col",
+ "Dr",
+ "Drs",
+ "Esq",
+ "Fr",
+ "Hon",
+ "Lieut",
+ "Lt",
+ "MM",
+ "Mdlle",
+ "Messers",
+ "Messrs",
+ "Mlle",
+ "Mlles",
+ "Mme",
+ "Mmes",
+ "Mon",
+ "Mr",
+ "Mrs",
+ "Ms",
+ "Prof",
+ "Pvt",
+ "Rev",
+ ])
xhtml = regex.sub(r"(?]*?\>))Bros\.", r"Bros.", xhtml)
xhtml = regex.sub(r"(?]*?\>))Mt\.", r"Mt.", xhtml)
- xhtml = regex.sub(r"(?]*?\>))MM\.", r"""MM.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mme\.", r"""Mme.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mmes\.", r"""Mmes.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mon\.", r"""Mon.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mlle\.", r"""Mlle.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mdlle\.", r"""Mdlle.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Mlles\.", r"""Mlles.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Messrs\.", r"""Messrs.""", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Messers\.", r"""Messers.""", xhtml)
xhtml = regex.sub(r"(?]*?\>))([Vv])ol(s?)\.", r"\1ol\2.", xhtml)
xhtml = regex.sub(r"(?]*?\>))([Cc])hap\. ([0-9])", r"\1hap. \2", xhtml) # The number allows us to avoid phrases like `Hello, old chap.`
xhtml = regex.sub(r"(?]*?\>)|\.)(P\.(?:P\.)?S\.(?:S\.)?\B)", r"""\1""", xhtml)
@@ -83,7 +86,6 @@ def semanticate(xhtml: str) -> str:
xhtml = regex.sub(r"(?]*?\>))Ltd\.", r"Ltd.", xhtml)
xhtml = regex.sub(r"(?]*?\>))St\.", r"St.", xhtml)
xhtml = regex.sub(r"(?]*?\>))([Gg])ov\.", r"\1ov.", xhtml)
- xhtml = regex.sub(r"(?]*?\>))Col\.", r"""Col.""", xhtml)
xhtml = regex.sub(r"(?]*?\>))MS(S?)\.", r"""MS\1.""", xhtml)
xhtml = regex.sub(r"(?]*?\>))([Vv])iz\.", r"\1iz.", xhtml)
xhtml = regex.sub(r"(?]*?\>))etc\.", r"etc.", xhtml)