Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: XML string formats for normalizedString and token #119

Merged
merged 7 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/customising-structure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,26 @@ For *Example 3*, you would add the following to your class:

Further examples are available in our :ref:`unit tests <unit-tests>`.

Serializing special XML string types
----------------------------------------------------

In XML, are special string types, ech with defined set of allowed characters and whitespace handling.
We can handle this by adding the decorator :obj:`serializable.xml_string()` to the appropriate property in your class.

.. code-block:: python

@property
@serializable.xml_string(serializable.XmlStringSerializationType.TOKEN)
def author(self) -> str:
return self._author

Further examples are available in our :ref:`unit tests <unit-tests>`.

.. note::

The actual transformation is done by :func:`serializable.xml.xs_normalizedString()`
and :func:`serializable.xml.xs_token()`

Serialization Views
----------------------------------------------------

Expand Down
112 changes: 100 additions & 12 deletions serializable/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@

from .formatters import BaseNameFormatter, CurrentFormatter
from .helpers import BaseHelper
from .xml import xs_normalizedString, xs_token

# `Intersection` is still not implemented, so it is interim replaced by Union for any support
# see section "Intersection" in https://peps.python.org/pep-0483/
Expand Down Expand Up @@ -128,6 +129,47 @@ class XmlArraySerializationType(Enum):
NESTED = 2


@unique
class XmlStringSerializationType(Enum):
"""
Enum to differentiate how string-type properties are serialized.
"""
STRING = 1
"""
as raw string.
see https://www.w3.org/TR/xmlschema-2/#string
"""
NORMALIZED_STRING = 2
"""
as `normalizedString`.
see http://www.w3.org/TR/xmlschema-2/#normalizedString"""
TOKEN = 3
"""
as `token`.
see http://www.w3.org/TR/xmlschema-2/#token"""

# unimplemented cases
# - https://www.w3.org/TR/xmlschema-2/#language
# - https://www.w3.org/TR/xmlschema-2/#NMTOKEN
# - https://www.w3.org/TR/xmlschema-2/#Name


# region _xs_string_mod_apply

__XS_STRING_MODS: Dict[XmlStringSerializationType, Callable[[str], str]] = {
XmlStringSerializationType.NORMALIZED_STRING: xs_normalizedString,
XmlStringSerializationType.TOKEN: xs_token,
}


def _xs_string_mod_apply(v: str, t: Optional[XmlStringSerializationType]) -> str:
mod = __XS_STRING_MODS.get(t) # type: ignore[arg-type]
return mod(v) if mod else v


# endregion _xs_string_mod_apply


def _allow_property_for_view(prop_info: 'ObjectMetadataLibrary.SerializableProperty', value_: Any,
view_: Optional[Type[ViewType]]) -> bool:
# First check Property is part of the View is given
Expand Down Expand Up @@ -394,7 +436,8 @@ def as_xml(self: Any, view_: Optional[Type[ViewType]] = None,
elif prop_info.is_enum:
v = v.value

this_e_attributes[_namespace_element_name(new_key, xmlns)] = str(v)
this_e_attributes[_namespace_element_name(new_key, xmlns)] = \
_xs_string_mod_apply(str(v), prop_info.xml_string_config)

element_name = _namespace_element_name(
element_name if element_name else CurrentFormatter.formatter.encode(self.__class__.__name__),
Expand Down Expand Up @@ -426,7 +469,8 @@ def as_xml(self: Any, view_: Optional[Type[ViewType]] = None,
continue

if new_key == '.':
this_e.text = str(v)
this_e.text = _xs_string_mod_apply(str(v),
prop_info.xml_string_config)
continue

if CurrentFormatter.formatter:
Expand All @@ -445,14 +489,16 @@ def as_xml(self: Any, view_: Optional[Type[ViewType]] = None,
nested_e.append(
j.as_xml(view_=view_, as_string=False, element_name=nested_key, xmlns=xmlns))
elif prop_info.is_enum:
SubElement(nested_e, nested_key).text = str(j.value)
SubElement(nested_e, nested_key).text = _xs_string_mod_apply(str(j.value),
prop_info.xml_string_config)
elif prop_info.concrete_type in (float, int):
SubElement(nested_e, nested_key).text = str(j)
elif prop_info.concrete_type is bool:
SubElement(nested_e, nested_key).text = str(j).lower()
else:
# Assume type is str
SubElement(nested_e, nested_key).text = str(j)
SubElement(nested_e, nested_key).text = _xs_string_mod_apply(str(j),
prop_info.xml_string_config)
elif prop_info.custom_type:
if prop_info.is_helper_type():
v_ser = prop_info.custom_type.xml_normalize(
Expand All @@ -462,11 +508,14 @@ def as_xml(self: Any, view_: Optional[Type[ViewType]] = None,
elif isinstance(v_ser, Element):
this_e.append(v_ser)
else:
SubElement(this_e, new_key).text = str(v_ser)
SubElement(this_e, new_key).text = _xs_string_mod_apply(str(v_ser),
prop_info.xml_string_config)
else:
SubElement(this_e, new_key).text = str(prop_info.custom_type(v))
SubElement(this_e, new_key).text = _xs_string_mod_apply(str(prop_info.custom_type(v)),
prop_info.xml_string_config)
elif prop_info.is_enum:
SubElement(this_e, new_key).text = str(v.value)
SubElement(this_e, new_key).text = _xs_string_mod_apply(str(v.value),
prop_info.xml_string_config)
elif not prop_info.is_primitive_type():
global_klass_name = f'{prop_info.concrete_type.__module__}.{prop_info.concrete_type.__name__}'
if global_klass_name in ObjectMetadataLibrary.klass_mappings:
Expand All @@ -475,16 +524,19 @@ def as_xml(self: Any, view_: Optional[Type[ViewType]] = None,
else:
# Handle properties that have a type that is not a Python Primitive (e.g. int, float, str)
if prop_info.string_format:
SubElement(this_e, new_key).text = f'{v:{prop_info.string_format}}'
SubElement(this_e, new_key).text = _xs_string_mod_apply(f'{v:{prop_info.string_format}}',
prop_info.xml_string_config)
else:
SubElement(this_e, new_key).text = str(v)
SubElement(this_e, new_key).text = _xs_string_mod_apply(str(v),
prop_info.xml_string_config)
elif prop_info.concrete_type in (float, int):
SubElement(this_e, new_key).text = str(v)
elif prop_info.concrete_type is bool:
SubElement(this_e, new_key).text = str(v).lower()
else:
# Assume type is str
SubElement(this_e, new_key).text = str(v)
SubElement(this_e, new_key).text = _xs_string_mod_apply(str(v),
prop_info.xml_string_config)

if as_string:
return cast(Element, SafeElementTree.tostring(this_e, 'unicode'))
Expand Down Expand Up @@ -542,6 +594,9 @@ def strip_default_namespace(s: str) -> str:
raise ValueError(f'Non-primitive types not supported from XML Attributes - see {decoded_k} for '
f'{cls.__module__}.{cls.__qualname__} which has Prop Metadata: {prop_info}')

if prop_info.xml_string_config:
v = _xs_string_mod_apply(v, prop_info.xml_string_config)

if prop_info.custom_type and prop_info.is_helper_type():
_data[decoded_k] = prop_info.custom_type.xml_deserialize(v)
elif prop_info.is_enum:
Expand All @@ -555,7 +610,7 @@ def strip_default_namespace(s: str) -> str:
if data.text:
for p, pi in klass_properties.items():
if pi.custom_names.get(SerializationType.XML) == '.':
_data[p] = data.text.strip()
_data[p] = _xs_string_mod_apply(data.text.strip(), pi.xml_string_config)

# Handle Sub-Elements
for child_e in data:
Expand Down Expand Up @@ -594,6 +649,9 @@ def strip_default_namespace(s: str) -> str:
try:
_logger.debug('Handling %s', prop_info)

if child_e.text:
child_e.text = _xs_string_mod_apply(child_e.text, prop_info.xml_string_config)

if prop_info.is_array and prop_info.xml_array_config:
array_type, nested_name = prop_info.xml_array_config

Expand All @@ -602,6 +660,9 @@ def strip_default_namespace(s: str) -> str:

if array_type == XmlArraySerializationType.NESTED:
for sub_child_e in child_e:
if sub_child_e.text:
sub_child_e.text = _xs_string_mod_apply(sub_child_e.text,
prop_info.xml_string_config)
if not prop_info.is_primitive_type() and not prop_info.is_enum:
_data[decoded_k].append(prop_info.concrete_type.from_xml(
data=sub_child_e, default_namespace=default_namespace)
Expand Down Expand Up @@ -675,6 +736,7 @@ class ObjectMetadataLibrary:
_deferred_property_type_parsing: Dict[str, Set['ObjectMetadataLibrary.SerializableProperty']] = {}
_klass_views: Dict[str, Type[ViewType]] = {}
_klass_property_array_config: Dict[str, Tuple[XmlArraySerializationType, str]] = {}
_klass_property_string_config: Dict[str, Optional[XmlStringSerializationType]] = {}
_klass_property_attributes: Set[str] = set()
_klass_property_include_none: Dict[str, Set[Tuple[Type[ViewType], Any]]] = {}
_klass_property_names: Dict[str, Dict[SerializationType, str]] = {}
Expand Down Expand Up @@ -738,12 +800,14 @@ class SerializableProperty:

_DEFAULT_XML_SEQUENCE = 100

def __init__(self, *, prop_name: str, prop_type: Any, custom_names: Dict[SerializationType, str],
def __init__(self, *,
prop_name: str, prop_type: Any, custom_names: Dict[SerializationType, str],
custom_type: Optional[Any] = None,
include_none_config: Optional[Set[Tuple[Type[ViewType], Any]]] = None,
is_xml_attribute: bool = False, string_format_: Optional[str] = None,
views: Optional[Iterable[Type[ViewType]]] = None,
xml_array_config: Optional[Tuple[XmlArraySerializationType, str]] = None,
xml_string_config: Optional[XmlStringSerializationType] = None,
xml_sequence_: Optional[int] = None) -> None:

self._name = prop_name
Expand All @@ -764,6 +828,7 @@ def __init__(self, *, prop_name: str, prop_type: Any, custom_names: Dict[Seriali
self._string_format = string_format_
self._views = set(views or ())
self._xml_array_config = xml_array_config
self._xml_string_config = xml_string_config
self._xml_sequence = xml_sequence_ or self._DEFAULT_XML_SEQUENCE

self._deferred_type_parsing = False
Expand Down Expand Up @@ -834,6 +899,10 @@ def xml_array_config(self) -> Optional[Tuple[XmlArraySerializationType, str]]:
def is_array(self) -> bool:
return self._is_array

@property
def xml_string_config(self) -> Optional[XmlStringSerializationType]:
return self._xml_string_config

@property
def is_enum(self) -> bool:
return self._is_enum
Expand Down Expand Up @@ -1050,6 +1119,7 @@ def register_klass(cls, klass: Type[_T], custom_name: Optional[str],
string_format_=ObjectMetadataLibrary._klass_property_string_formats.get(qualified_property_name),
views=ObjectMetadataLibrary._klass_property_views.get(qualified_property_name),
xml_array_config=ObjectMetadataLibrary._klass_property_array_config.get(qualified_property_name),
xml_string_config=ObjectMetadataLibrary._klass_property_string_config.get(qualified_property_name),
xml_sequence_=ObjectMetadataLibrary._klass_property_xml_sequence.get(
qualified_property_name,
ObjectMetadataLibrary.SerializableProperty._DEFAULT_XML_SEQUENCE)
Expand Down Expand Up @@ -1117,6 +1187,11 @@ def register_xml_property_array_config(cls, qual_name: str,
array_type: XmlArraySerializationType, child_name: str) -> None:
cls._klass_property_array_config[qual_name] = (array_type, child_name)

@classmethod
def register_xml_property_string_config(cls, qual_name: str,
string_type: Optional[XmlStringSerializationType]) -> None:
cls._klass_property_string_config[qual_name] = string_type

@classmethod
def register_xml_property_attribute(cls, qual_name: str) -> None:
cls._klass_property_attributes.add(qual_name)
Expand Down Expand Up @@ -1305,6 +1380,19 @@ def decorate(f: _F) -> _F:
return decorate


def xml_string(string_type: XmlStringSerializationType) -> Callable[[_F], _F]:
"""Decorator"""

def decorate(f: _F) -> _F:
_logger.debug('Registering %s.%s as XML StringType: %s', f.__module__, f.__qualname__, string_type)
ObjectMetadataLibrary.register_xml_property_string_config(
qual_name=f'{f.__module__}.{f.__qualname__}', string_type=string_type
)
return f

return decorate


def xml_name(name: str) -> Callable[[_F], _F]:
"""Decorator"""

Expand Down
22 changes: 22 additions & 0 deletions serializable/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# encoding: utf-8

# This file is part of py-serializable
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) Paul Horton. All Rights Reserved.

"""
JSON-specific functionality.
"""
82 changes: 82 additions & 0 deletions serializable/xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# encoding: utf-8

# This file is part of py-serializable
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) Paul Horton. All Rights Reserved.

"""
XML-specific functionality.
"""

__all__ = ['xs_normalizedString', 'xs_token']

from re import compile as re_compile

# region normalizedString

__NORMALIZED_STRING_FORBIDDEN_SEARCH = re_compile(r'\r\n|\t|\n|\r')
__NORMALIZED_STRING_FORBIDDEN_REPLACE = ' '


def xs_normalizedString(s: str) -> str:
"""Make a ``normalizedString``, adhering XML spec.

.. epigraph::
*normalizedString* represents white space normalized strings.
The `·value space· <https://www.w3.org/TR/xmlschema-2/#dt-value-space>`_ of normalizedString is the set of
strings that do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters.
The `·lexical space· <https://www.w3.org/TR/xmlschema-2/#dt-lexical-space>`_ of normalizedString is the set of
strings that do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters.
The `·base type· <https://www.w3.org/TR/xmlschema-2/#dt-basetype>`_ of normalizedString is
`string <https://www.w3.org/TR/xmlschema-2/#string>`_.

-- the `XML schema spec <http://www.w3.org/TR/xmlschema-2/#normalizedString>`_
"""
return __NORMALIZED_STRING_FORBIDDEN_SEARCH.sub(
__NORMALIZED_STRING_FORBIDDEN_REPLACE,
s)


# endregion

# region token


__TOKEN_MULTISTRING_SEARCH = re_compile(r' {2,}')
__TOKEN_MULTISTRING_REPLACE = ' '


def xs_token(s: str) -> str:
"""Make a ``token``, adhering XML spec.

.. epigraph::
*token* represents tokenized strings.
The `·value space· <https://www.w3.org/TR/xmlschema-2/#dt-value-space>`_ of token is the set of strings that do
not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters, that have no leading or
trailing spaces (#x20) and that have no internal sequences of two or more spaces.
The `·lexical space· <https://www.w3.org/TR/xmlschema-2/#dt-lexical-space>`_ of token is the set of strings that
do not contain the carriage return (#xD), line feed (#xA) nor tab (#x9) characters, that have no leading or
trailing spaces (#x20) and that have no internal sequences of two or more spaces.
The `·base type· <https://www.w3.org/TR/xmlschema-2/#dt-basetype>`_ of token is
`normalizedString <https://www.w3.org/TR/xmlschema-2/#normalizedString>`_.

-- the `XML schema spec <http://www.w3.org/TR/xmlschema-2/#token>`_
"""
return __TOKEN_MULTISTRING_SEARCH.sub(
__TOKEN_MULTISTRING_REPLACE,
xs_normalizedString(s).strip())

# endregion
Loading
Loading