From 68da4462495b3768c7ecad6825201cbae9357976 Mon Sep 17 00:00:00 2001
From: Andrew Paseltiner <apaseltiner@gmail.com>
Date: Mon, 17 Jun 2024 12:20:51 -0400
Subject: [PATCH] Use named list for regex substitution of namespace prefixes

I profiled se lint because it seemed to be surprisingly slow for a
fairly small repository. This revealed that half of the execution time
was spent in the _replace_shorthand_namespaces method.

For the same repository, changing that method to use a named list (and
therefore, only a single call to regex.sub) reduced the time spent in
that method from ~15 seconds to ~5 seconds. This optimization is
correct, and arguably easier to understand, because a single attribute
can only have one prefix.

Using a named list also avoids the need to manually escape the prefixes
before including them in the regex pattern, which this code was
incorrectly skipping.

Finally, this change corrects a mistake in that method's documentation,
as the : suffix is not retained in the output.
---
 se/easy_xml.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/se/easy_xml.py b/se/easy_xml.py
index 74730a72..b2d8680e 100644
--- a/se/easy_xml.py
+++ b/se/easy_xml.py
@@ -231,16 +231,13 @@ def _replace_shorthand_namespaces(self, value:str) -> str:
 		shorthand namespaces.
 
 		Example:
-		epub:type -> {http://www.idpf.org/2007/ops}:type
+		epub:type -> {http://www.idpf.org/2007/ops}type
 		"""
 
-		output = value
-
 		if self.namespaces:
-			for name, identifier in self.namespaces.items():
-				output = regex.sub(fr"^{name}:", f"{{{identifier}}}", output)
+			value = regex.sub(r"^(\L<ns>):", lambda m: f"{{{self.namespaces[m[1]]}}}", value, ns=self.namespaces.keys())
 
-		return output
+		return value
 
 	def to_tag_string(self) -> str:
 		"""