From 79a86352bb52b1528a9bf9c860331915b4db7556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leon=20M=C3=B6ller?= Date: Tue, 19 Dec 2023 13:17:49 +0100 Subject: [PATCH 1/2] adapter.xml: verify declared namespaces before deserializing Previously, if the elements of an XML document are part of an unknown namespace, this would lead to cryptic error messages such as: Unexpected top-level list aas:assetAdministrationShells on line 3 where, the correct expected element is indeed aas:assetAdministrationShells, leaving the user wondering about what could possibly be wrong. The only difference is the namespace, which isn't part of the error message, because it gets replaced by the prefix. To improve the error messages in this case, a check that compares the namespaces declared on the document against the ones required by the deserialization, and errors if a required namespace isn't declared. Partially fix https://github.com/eclipse-basyx/basyx-python-sdk/issues/190 --- basyx/aas/adapter/xml/xml_deserialization.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/basyx/aas/adapter/xml/xml_deserialization.py b/basyx/aas/adapter/xml/xml_deserialization.py index e42a9a6f6..73f91ced5 100644 --- a/basyx/aas/adapter/xml/xml_deserialization.py +++ b/basyx/aas/adapter/xml/xml_deserialization.py @@ -49,11 +49,12 @@ import enum from typing import Any, Callable, Dict, IO, Iterable, Optional, Set, Tuple, Type, TypeVar -from .._generic import XML_NS_AAS, MODELLING_KIND_INVERSE, ASSET_KIND_INVERSE, KEY_TYPES_INVERSE, \ +from .._generic import XML_NS_MAP, XML_NS_AAS, MODELLING_KIND_INVERSE, ASSET_KIND_INVERSE, KEY_TYPES_INVERSE, \ ENTITY_TYPES_INVERSE, IEC61360_DATA_TYPES_INVERSE, IEC61360_LEVEL_TYPES_INVERSE, KEY_TYPES_CLASSES_INVERSE, \ REFERENCE_TYPES_INVERSE, DIRECTION_INVERSE, STATE_OF_EVENT_INVERSE, QUALIFIER_KIND_INVERSE NS_AAS = XML_NS_AAS +REQUIRED_NAMESPACES: Set[str] = {XML_NS_MAP["aas"]} logger = logging.getLogger(__name__) @@ -1196,13 +1197,22 @@ def _parse_xml_document(file: IO, failsafe: bool = True, **parser_kwargs: Any) - parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, **parser_kwargs) try: - return etree.parse(file, parser).getroot() + root = etree.parse(file, parser).getroot() except etree.XMLSyntaxError as e: if failsafe: logger.error(e) return None raise e + missing_namespaces: Set[str] = REQUIRED_NAMESPACES - set(root.nsmap.values()) + if missing_namespaces: + error_message = f"The following required namespaces are not declared: {' | '.join(missing_namespaces)}" \ + + " - Is the input document of an older version?" + if not failsafe: + raise KeyError(error_message) + logger.error(error_message) + return root + def _select_decoder(failsafe: bool, stripped: bool, decoder: Optional[Type[AASFromXmlDecoder]]) \ -> Type[AASFromXmlDecoder]: From 10c7522c06fa06fa5c7bdffe08d0703945238b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leon=20M=C3=B6ller?= Date: Tue, 19 Dec 2023 13:37:39 +0100 Subject: [PATCH 2/2] adapter.xml: improve element formatting in error messages Previously, the namespace of an element would always be replaced by its prefix if the prefix is known. However, this turned out to mask errors in case the namespace is different from the one used by our SDK. Thus, the function `_element_pretty_identifier()` is adjusted such that it only replaces the namespace if it matches one of the namespaces known to our SDK. Partially fix https://github.com/eclipse-basyx/basyx-python-sdk/issues/190 See also: 79a86352bb52b1528a9bf9c860331915b4db7556 --- basyx/aas/adapter/xml/xml_deserialization.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/basyx/aas/adapter/xml/xml_deserialization.py b/basyx/aas/adapter/xml/xml_deserialization.py index 73f91ced5..f8136a64b 100644 --- a/basyx/aas/adapter/xml/xml_deserialization.py +++ b/basyx/aas/adapter/xml/xml_deserialization.py @@ -98,7 +98,7 @@ def _element_pretty_identifier(element: etree.Element) -> str: Returns a pretty element identifier for a given XML element. If the prefix is known, the namespace in the element tag is replaced by the prefix. - If additionally also the sourceline is known, is is added as a suffix to name. + If additionally also the sourceline is known, it is added as a suffix to name. For example, instead of "{https://admin-shell.io/aas/3/0}assetAdministrationShell" this function would return "aas:assetAdministrationShell on line $line", if both, prefix and sourceline, are known. @@ -107,7 +107,11 @@ def _element_pretty_identifier(element: etree.Element) -> str: """ identifier = element.tag if element.prefix is not None: - identifier = element.prefix + ":" + element.tag.split("}")[1] + # Only replace the namespace by the prefix if it matches our known namespaces, + # so the replacement by the prefix doesn't mask errors such as incorrect namespaces. + namespace, tag = element.tag.split("}", 1) + if namespace[1:] in XML_NS_MAP.values(): + identifier = element.prefix + ":" + tag if element.sourceline is not None: identifier += f" on line {element.sourceline}" return identifier