From d56c612ad6b18f66c8d24697a11a893fabacf8eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leon=20M=C3=B6ller?= Date: Thu, 5 Oct 2023 18:56:56 +0200 Subject: [PATCH] model._string_constraints: escape unicode characters in errors Escape unicode characters in regular expression patterns and string values for clean error messages. --- basyx/aas/model/_string_constraints.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/basyx/aas/model/_string_constraints.py b/basyx/aas/model/_string_constraints.py index 4973388e8..5282cd5b6 100644 --- a/basyx/aas/model/_string_constraints.py +++ b/basyx/aas/model/_string_constraints.py @@ -29,6 +29,13 @@ AASD130_RE = re.compile("[\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]*") +def _unicode_escape(value: str) -> str: + """ + Escapes unicode characters such as \uD7FF, that may be used in regular expressions, for better error messages. + """ + return value.encode("unicode_escape").decode("utf-8") + + # Functions to verify the constraints for a given value. def check(value: str, type_name: str, min_length: int = 0, max_length: Optional[int] = None, pattern: Optional[re.Pattern] = None) -> None: @@ -37,14 +44,16 @@ def check(value: str, type_name: str, min_length: int = 0, max_length: Optional[ if max_length is not None and len(value) > max_length: raise ValueError(f"{type_name} has a maximum length of {max_length}! (length: {len(value)})") if pattern is not None and not pattern.fullmatch(value): - raise ValueError(f"{type_name} must match the pattern '{pattern.pattern}'! (value: '{value}')") + raise ValueError(f"{type_name} must match the pattern '{_unicode_escape(pattern.pattern)}'! " + f"(value: '{_unicode_escape(value)}')") # Constraint AASd-130 if not AASD130_RE.fullmatch(value): # It's easier to implement this as a ValueError, because otherwise AASConstraintViolation would need to be # imported from `base` and the ConstrainedLangStringSet would need to except AASConstraintViolation errors # as well, while only re-raising ValueErrors. Thus, even if an AASConstraintViolation would be raised here, # in case of a ConstrainedLangStringSet it would be re-raised as a ValueError anyway. - raise ValueError(f"Every string must match the pattern '{AASD130_RE.pattern}'! (value: '{value}')") + raise ValueError(f"Every string must match the pattern '{_unicode_escape(AASD130_RE.pattern)}'! " + f"(value: '{_unicode_escape(value)}')") def check_content_type(value: str, type_name: str = "ContentType") -> None: