From 68da4462495b3768c7ecad6825201cbae9357976 Mon Sep 17 00:00:00 2001 From: Andrew Paseltiner Date: Mon, 17 Jun 2024 12:20:51 -0400 Subject: [PATCH] Use named list for regex substitution of namespace prefixes I profiled se lint because it seemed to be surprisingly slow for a fairly small repository. This revealed that half of the execution time was spent in the _replace_shorthand_namespaces method. For the same repository, changing that method to use a named list (and therefore, only a single call to regex.sub) reduced the time spent in that method from ~15 seconds to ~5 seconds. This optimization is correct, and arguably easier to understand, because a single attribute can only have one prefix. Using a named list also avoids the need to manually escape the prefixes before including them in the regex pattern, which this code was incorrectly skipping. Finally, this change corrects a mistake in that method's documentation, as the : suffix is not retained in the output. --- se/easy_xml.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/se/easy_xml.py b/se/easy_xml.py index 74730a72..b2d8680e 100644 --- a/se/easy_xml.py +++ b/se/easy_xml.py @@ -231,16 +231,13 @@ def _replace_shorthand_namespaces(self, value:str) -> str: shorthand namespaces. Example: - epub:type -> {http://www.idpf.org/2007/ops}:type + epub:type -> {http://www.idpf.org/2007/ops}type """ - output = value - if self.namespaces: - for name, identifier in self.namespaces.items(): - output = regex.sub(fr"^{name}:", f"{{{identifier}}}", output) + value = regex.sub(r"^(\L):", lambda m: f"{{{self.namespaces[m[1]]}}}", value, ns=self.namespaces.keys()) - return output + return value def to_tag_string(self) -> str: """