diff --git a/src/metldata/builtin_transformations/count_content_values/assumptions.py b/src/metldata/builtin_transformations/count_content_values/assumptions.py new file mode 100644 index 0000000..f5f785e --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/assumptions.py @@ -0,0 +1,171 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Check model assumptions for the add content properties transformation.""" + +from schemapack.spec.schemapack import ClassDefinition, SchemaPack + +from metldata.builtin_transformations.add_content_properties.path import ( + resolve_schema_object_path, +) +from metldata.builtin_transformations.count_content_values.instruction import ( + CountContentValuesInstruction, +) +from metldata.builtin_transformations.count_content_values.path.path import RelationPath +from metldata.builtin_transformations.count_content_values.path.path_elements import ( + RelationPathElementType, +) +from metldata.transform.base import ModelAssumptionError + + +def check_model_assumptions( + schema: SchemaPack, + instructions: list[CountContentValuesInstruction], +) -> None: + """Check the model assumptions for the add content properties transformation.""" + for instruction in instructions: + class_name = instruction.class_name + class_def = schema.classes.get(class_name) + + # Check if the class exists in the model + if not class_def: + raise ModelAssumptionError( + f"Class {class_name} does not exist in the model." + ) + + assert_target_path_assumptions( + class_def=class_def, class_name=class_name, instruction=instruction + ) + assert_relation_path_assumptions( + model=schema, path=instruction.source.relation_path + ) + + +def assert_target_path_assumptions( + *, + class_def: ClassDefinition, + class_name: str, + instruction: CountContentValuesInstruction, +): + """Assert object path exists and property does not already exists in the target content""" + object_path = instruction.target_content.object_path + property_name = instruction.target_content.property_name + + # check if content schema exists for given object path + try: + target_schema = resolve_schema_object_path( + json_schema=class_def.content.json_schema_dict, + path=object_path, + ) + except KeyError as error: + raise ModelAssumptionError( + f"Target object path { + object_path} does not exist in class {class_name}." + ) from error + + if property_name in target_schema.get("properties", {}): + raise ModelAssumptionError( + f"Property {property_name} already exists for object path { + object_path} in class {class_name}." + ) + + +def assert_relation_path_assumptions(model: SchemaPack, path: RelationPath): + """Make sure that all classes and relations defined in the provided path exist in + the provided model. + + Raises: + ModelAssumptionError: + if the model does not fulfill the assumptions. + """ + for path_element in path.elements: + if path_element.source not in model.classes: + raise ModelAssumptionError( + f"Class {path_element.source} not found in model." + ) + + if path_element.target not in model.classes: + raise ModelAssumptionError( + f"Class {path_element.target} not found in model." + ) + + if path_element.type_ == RelationPathElementType.ACTIVE: + if ( + path_element.property + not in model.classes[path_element.source].relations + ): + raise ModelAssumptionError( + f"Relation property { + path_element.property} not found in class" + f" {path_element.source}." + ) + + return + + if path_element.property not in model.classes[path_element.target].relations: + raise ModelAssumptionError( + f"Relation property {path_element.property} not found in class" + f" {path_element.target}." + ) + + +def assert_relational_multiplicity( + model: SchemaPack, relation_path: RelationPath, content_path: str +): + """TODO""" + for path_element in relation_path.elements: + if path_element.type_ == RelationPathElementType.ACTIVE: + multiplicity = ( + model.classes[path_element.source] + .relations[path_element.property] + .multiple + ) + if not multiplicity.origin or not multiplicity.target: + raise ModelAssumptionError( + f"Relation property { + path_element.property} not found in class" + f" {path_element.target}." + ) + + target_content_schema = model.classes[ + path_element.target + ].content.json_schema_dict + + try: + target_schema = resolve_schema_object_path( + json_schema=target_content_schema, + path=content_path, + ) + except KeyError as error: + raise ModelAssumptionError( + f"{path_element.target} does not contain the property { + content_path}" + ) from error + + if target_schema["type"] != "integer": + raise ModelAssumptionError(f"{content_path} of class { + path_element.target} is not an integer property.") + + else: + multiplicity = ( + model.classes[path_element.target] + .relations[path_element.property] + .multiple + ) + if not multiplicity.origin or multiplicity.target: + raise ModelAssumptionError( + f"Relation property { + path_element.property} not found in class" + f" {path_element.target}." + ) diff --git a/src/metldata/builtin_transformations/count_content_values/config.py b/src/metldata/builtin_transformations/count_content_values/config.py new file mode 100644 index 0000000..f8daf2c --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/config.py @@ -0,0 +1,34 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Models used to describe content properties that shall be deleted.""" + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + +from metldata.builtin_transformations.count_content_values.instruction import ( + CountContentValuesInstruction, +) + + +class CountContentValuesConfig(BaseSettings): + """TODO""" + + model_config = SettingsConfigDict(extra="forbid") + + count_content_values: list[CountContentValuesInstruction] = Field( + ..., description="A list of instructions for counting content properties." + ) diff --git a/src/metldata/builtin_transformations/count_content_values/data_transform.py b/src/metldata/builtin_transformations/count_content_values/data_transform.py new file mode 100644 index 0000000..9e23060 --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/data_transform.py @@ -0,0 +1,73 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Data transformation logic for the add content properties transformation.""" + +from copy import deepcopy + +from schemapack.spec.datapack import DataPack + +from metldata.builtin_transformations.add_content_properties.instruction import ( + AddContentPropertyInstruction, +) +from metldata.builtin_transformations.add_content_properties.path import ( + resolve_data_object_path, +) +from metldata.transform.base import EvitableTransformationError + + +def add_properties( + *, + data: DataPack, + instructions_by_class: dict[str, list[AddContentPropertyInstruction]], +) -> DataPack: + """Given a data pack and a dictionary of instructions by class, add the specified + content properties to the data. + + Args: + data: + The datapack to add the content properties to. + instructions_by_class: + A dictionary mapping class names to lists of instructions. + + Returns: + The data with the specified content properties being added. + """ + modified_data = data.model_copy(deep=True) + + for class_name, instructions in instructions_by_class.items(): + resources = modified_data.resources.get(class_name) + + if not resources: + raise EvitableTransformationError() + + for resource in resources.values(): + for instruction in instructions: + content = resource.content + object = resolve_data_object_path( + data=content, + path=instruction.target_content.object_path, + ) + + if ( + not isinstance(object, dict) + or instruction.target_content.property_name in object + ): + raise EvitableTransformationError() + + object[instruction.target_content.property_name] = deepcopy( + instruction.value + ) + + return modified_data diff --git a/src/metldata/builtin_transformations/count_content_values/instruction.py b/src/metldata/builtin_transformations/count_content_values/instruction.py new file mode 100644 index 0000000..fce8c4e --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/instruction.py @@ -0,0 +1,44 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Models for instructions used in the 'add content properties' transformation.""" + +from typing import Any, Final + +from pydantic import Field +from pydantic_settings import BaseSettings + +from metldata.builtin_transformations.common import NewContentSchemaPath +from metldata.builtin_transformations.count_content_values.path.path import RelationPath + +ADDED_PROPERTY_SCHEMA: Final[dict[str, Any]] = { + "type": "object", + "additionalProperties": True, +} + + +class SourcePaths(BaseSettings): + """TODO""" + + relation_path: RelationPath + content_path: str + + +class CountContentValuesInstruction(BaseSettings): + """TODO""" + + class_name: str = Field(..., description="The name of the class to modify.") + target_content: NewContentSchemaPath + source: SourcePaths diff --git a/src/metldata/builtin_transformations/count_content_values/main.py b/src/metldata/builtin_transformations/count_content_values/main.py new file mode 100644 index 0000000..733eefe --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/main.py @@ -0,0 +1,85 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""A transformation to add content properties.""" + +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack + +from metldata.builtin_transformations.add_content_properties.assumptions import ( + check_model_assumptions, +) +from metldata.builtin_transformations.add_content_properties.config import ( + AddContentPropertiesConfig, +) +from metldata.builtin_transformations.add_content_properties.data_transform import ( + add_properties, +) +from metldata.builtin_transformations.add_content_properties.model_transform import ( + add_content_properties, +) +from metldata.transform.base import ( + DataTransformer, + TransformationDefinition, +) + + +class AddContentPropertiesTransformer(DataTransformer[AddContentPropertiesConfig]): + """A transformer that adds content properties to the data.""" + + def transform(self, data: DataPack) -> DataPack: + """Transforms data. + + Args: + data: The data as DataPack to be transformed. + """ + return add_properties( + data=data, instructions_by_class=self._config.instructions_by_class() + ) + + +def check_model_assumptions_wrapper( + model: SchemaPack, + config: AddContentPropertiesConfig, +) -> None: + """Check the assumptions of the model. + + Raises: + ModelAssumptionError: + if the model does not fulfill the assumptions. + """ + check_model_assumptions( + schema=model, instructions_by_class=config.instructions_by_class() + ) + + +def transform_model( + model: SchemaPack, config: AddContentPropertiesConfig +) -> SchemaPack: + """Transform the data model.""" + return add_content_properties( + model=model, instructions_by_class=config.instructions_by_class() + ) + + +ADD_CONTENT_PROPERTIES_TRANSFORMATION = TransformationDefinition[ + AddContentPropertiesConfig +]( + config_cls=AddContentPropertiesConfig, + check_model_assumptions=check_model_assumptions_wrapper, + transform_model=transform_model, + data_transformer_factory=AddContentPropertiesTransformer, +) diff --git a/src/metldata/builtin_transformations/count_content_values/model_transform.py b/src/metldata/builtin_transformations/count_content_values/model_transform.py new file mode 100644 index 0000000..ce8ea4a --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/model_transform.py @@ -0,0 +1,37 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Model transformation logic for the 'add content property' transformation""" + +from typing import Any + +from schemapack.spec.schemapack import ( + SchemaPack, +) + +from metldata.builtin_transformations.add_content_properties.instruction import ( + AddContentPropertyInstruction, +) +from metldata.builtin_transformations.add_content_properties.path import ( + resolve_schema_object_path, +) +from metldata.transform.base import EvitableTransformationError + + +def add_counted_values( + *, + model: SchemaPack, + instructions: list[Any], +) -> SchemaPack: + """Adds a new content property to the provided model.""" diff --git a/src/metldata/builtin_transformations/count_content_values/path/__init__.py b/src/metldata/builtin_transformations/count_content_values/path/__init__.py new file mode 100644 index 0000000..27b862d --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/path/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Utils for dealing with references between classes of the metadata model.""" diff --git a/src/metldata/builtin_transformations/count_content_values/path/path.py b/src/metldata/builtin_transformations/count_content_values/path/path.py new file mode 100644 index 0000000..f6ac3cf --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/path/path.py @@ -0,0 +1,119 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Logic for handling relation paths.""" + +from pydantic import GetJsonSchemaHandler, ValidationInfo + +from metldata.builtin_transformations.infer_relations.path.path_str import ( + PATH_PATTERN, + ValidationError, + clean_path_str, + path_str_to_object_elements, +) + + +class RelationPath: + """A model describing the path of a relation between classes of a metadata model. + + The relation path has two available representation. A string-based ("path_str" + attribute) and an element-based ("elements" attribute) one. + + In the string-based representation ("path_str" attribute), the first and the last + word correspond to the name of the source and target class respectively. ">" and "<" + means active (left class refers the right one) and passive (the left + class is referenced by the right one). Parentheses attached to these angles thereby + indicate the property name of the referencing class. E.g. "class_a(class_b)>class_b" + means that the source class "class_a" has a property "class_b" that references the + target class "class_b". Or "class_a<(class_a)class_b" means that the source + class "class_a" is referenced by the target class "class_b" via its properties + "class_a". Reference paths can also involve additional classes. E.g. a string of + "class_a<(class_a)class_b(class_c)>class_c" means that + a relation from the source class "class_a" to the target class "class_c" can be + established via an additional class "class_b". Any inserted spaces or newlines will + be ignored. So the following paths are equivalent: + - "class_a (class_b)> class_b" + - "class_a + (class_b)> + class_b" + + A relation path consists of one or more elements. An element is a relationship + between two classes. Reference paths that establish a direct relationship between + source and target classes without the use of additional classes have only one + element (e.g. in string representations "class_a(class_b)>class_b" or + "class_a<(class_a)class_b"). More complex paths consist of multiple elements. + E.g. the path "class_a<(class_a)class_b(class_c)>class_c" can be decomposed + into the elements: "class_a<(class_a)class_b" and + "class_b(class_c)>class_c". + + The elements of a RelationPath are stored in the "elements" attribute as a list + of RelationPathElement objects that are optimized for programmatic use. + + The "source" attribute provides the source class of the path while the + "target" attribute provides the target class of the path. + """ + + def __init__(self, *, path_str: str): + """Construct relation path from a string-based representation.""" + self.path_str = clean_path_str(path_str=path_str) + self.elements = path_str_to_object_elements(path_str=self.path_str) + self.source = self.elements[0].source + self.target = self.elements[-1].target + + @classmethod + def validate(cls, value, info: ValidationInfo) -> "RelationPath": + """A validator for pydantic.""" + if isinstance(value, cls): + return value + + if not isinstance(value, str): + raise ValueError("A string is required.") + + try: + return cls(path_str=value) + except ValidationError as error: + raise ValueError(str(error)) from ValidationError + + @classmethod + def __get_validators__(cls): + """To get validators for pydantic""" + yield cls.validate + + @classmethod + def __get_pydantic_json_schema__( + cls, field_schema: dict, handler: GetJsonSchemaHandler + ): + """Modify the field schema for pydantic.""" + field_schema.update(type="string", pattern=PATH_PATTERN) + + def __hash__(self): + """Calculate a hash.""" + return hash(self.path_str) + + def __eq__(self, other: object): + """For comparisons.""" + if not isinstance(other, RelationPath): + return NotImplemented + + return self.path_str == other.path_str + + def __repr__(self): + """For representation.""" + return f"RelationPath(path_str='{self.path_str}')" + + def __str__(self): + """For string representation.""" + return self.path_str diff --git a/src/metldata/builtin_transformations/count_content_values/path/path_elements.py b/src/metldata/builtin_transformations/count_content_values/path/path_elements.py new file mode 100644 index 0000000..de02914 --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/path/path_elements.py @@ -0,0 +1,62 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Data models""" + +from enum import Enum + +from pydantic import BaseModel, Field + + +class RelationPathElementType(Enum): + """The type of RelationPathElements. + + Can be active, meaning the source class is referencing the target class using the + specified slot. + Or passive, meaning that the source class is referenced by the target class and the + slot is part of the target class. + """ + + ACTIVE = "active" + PASSIVE = "passive" + + +class RelationPathElement(BaseModel): + """A model describing an element of a relation path between classes of a + metadata model as further explained by the RelationPath. + """ + + type_: RelationPathElementType = Field( + ..., + description=( + "The type of relation. Active or passive as explained in the" + + " RelationPathElementType enum." + ), + ) + source: str = Field( + ..., description="The name of the source class that is referencing." + ) + target: str = Field( + ..., description="The name of the target class that is referenced." + ) + property: str = Field( + ..., + description=( + "The name of the property that holds the relation." + + " In case of a active type, the property is part of the source class." + + " In case of a passive type, the property is part of the target class." + ), + ) diff --git a/src/metldata/builtin_transformations/count_content_values/path/path_str.py b/src/metldata/builtin_transformations/count_content_values/path/path_str.py new file mode 100644 index 0000000..6d3e548 --- /dev/null +++ b/src/metldata/builtin_transformations/count_content_values/path/path_str.py @@ -0,0 +1,191 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Data models""" + +import re + +from metldata.builtin_transformations.infer_relations.path.path_elements import ( + RelationPathElement, + RelationPathElementType, +) + +NAME_PATTERN = r"(?!\d)\w+" +ACTIVE_ARROW_PATTERN = rf"\({NAME_PATTERN}\)>" +PASSIVE_ARROW_PATTERN = rf"<\({NAME_PATTERN}\)" +ARROW_PATTERN = rf"(({ACTIVE_ARROW_PATTERN})|({PASSIVE_ARROW_PATTERN}))" +ELEMENT_PATTERN = rf"{NAME_PATTERN}{ARROW_PATTERN}{NAME_PATTERN}" +PATH_RAW_CHAR_PATTERN = r"^[\w><\(\)]+$" +PATH_PATTERN = ( + rf"^{NAME_PATTERN}{ARROW_PATTERN}({NAME_PATTERN}{ARROW_PATTERN})*{NAME_PATTERN}$" +) + + +class ValidationError(RuntimeError): + """Raised when a path string was invalid""" + + +def clean_path_str(path_str: str) -> str: + """Cleanup whitespaces, newlines, etc.""" + return "".join(path_str.split()) + + +def validate_path_str_characters(path_str: str) -> None: + """Validates the characters of the an uncleaned path str. The path_str is assumed to + be cleaned. + + Raises: + ValidationError: if invalid. + """ + if not re.match(PATH_RAW_CHAR_PATTERN, path_str): + raise ValidationError( + f"The following path string contains invalid characters: {path_str}" + ) + + +def validate_path_str_format(path_str: str) -> None: + """Validates the format of the path str. The path_str is assumed to be cleaned. + + Raises: + ValidationError: if invalid. + """ + if not re.match(PATH_PATTERN, path_str): + raise ValidationError( + f"The following path string has an invalid format: {path_str}" + ) + + +def validate_string_element(string_element: str) -> None: + """Validates the format of a string-based path element. The path_str is assumed to + be cleaned. + + Raises: + ValidationError: if invalid. + """ + if not re.match(rf"^{ELEMENT_PATTERN}$", string_element): + raise ValidationError( + "The following string-based path element has an invalid format: " + + string_element + ) + + +def extract_first_element(*, path_str: str) -> str: + """Extract the first element of a path_str. The path_str is assumed to be cleaned. + + Raises: + ValidationError: if no element can be extracted. + """ + match = re.match(rf"^({ELEMENT_PATTERN}).*$", path_str) + + if not match: + raise ValidationError(f"Cannot find element in path string: {path_str}") + + return match.group(1) + + +def get_target_class(*, path_str: str) -> str: + """Get the target class of a path str. The path_str is assumed to be cleaned.""" + match = re.match(rf"^.*?({NAME_PATTERN})$", path_str) + + if not match: + raise ValidationError(f"Cannot find target class of path string: {path_str}") + + return match.group(1) + + +def split_first_element(*, path_str: str) -> tuple[str, str | None]: + """Return a tuple of the first element and the remaining path string. + Thereby, the target class of the first element is set as the source class of the + remaining path. + The second element is None if the provided path only contained one element. + The path_str is assumed to be cleaned. + """ + first_element = extract_first_element(path_str=path_str) + first_element_target_class = get_target_class(path_str=first_element) + + if first_element == path_str: + return first_element, None + + remaining_path = path_str[len(first_element) :] + remaining_path_extended = first_element_target_class + remaining_path + + return first_element, remaining_path_extended + + +def get_string_elements(*, path_str: str) -> list[str]: + """Decomposes a path string into elements in string repesentation. The path_str is + assumed to be cleaned. + """ + elements: list[str] = [] + remaining_path = path_str + + # extract one element at a time: + while remaining_path: + element, remaining_path = split_first_element( # type: ignore + path_str=remaining_path + ) + elements.append(element) + + return elements + + +def get_element_type(*, string_element: str) -> RelationPathElementType: + """Infers the type of the provided string-based element.""" + validate_string_element(string_element) + + return ( + RelationPathElementType.ACTIVE + if ">" in string_element + else RelationPathElementType.PASSIVE + ) + + +def get_element_components(*, string_element: str) -> tuple[str, str, str]: + """Returns a tuple of the source, the slot, and the target of the string-based path + element. + """ + # remove the angle: + string_element_cleaned = string_element.replace(">", "").replace("<", "") + + # extract the source: + source, slot_and_target = string_element_cleaned.split("(") + + # extract slot and target: + slot, target = slot_and_target.split(")") + + return source, slot, target + + +def string_element_to_object(string_element: str) -> RelationPathElement: + """Translates a string-based path element into an object-based representation.""" + validate_string_element(string_element) + type_ = get_element_type(string_element=string_element) + source, slot, target = get_element_components(string_element=string_element) + + return RelationPathElement(type_=type_, source=source, property=slot, target=target) + + +def path_str_to_object_elements(path_str: str) -> list[RelationPathElement]: + """Translates a path string into a list of object-based elements. The path_str is + assumed to be cleaned. + """ + validate_path_str_characters(path_str=path_str) + validate_path_str_format(path_str=path_str) + + string_elements = get_string_elements(path_str=path_str) + return [ + string_element_to_object(string_element) for string_element in string_elements + ]