generated from ghga-de/microservice-repository-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement "Add content property" transformation (GSI-729) (#68)
* Add config schema * Add boilerplate and basic test case * Add schema transformation * Add data transformation * Fix shallow copy bug * Add test data annotations * Update src/metldata/builtin_transformations/add_content_properties/assumptions.py Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de> * Update src/metldata/builtin_transformations/add_content_properties/model_transform.py Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de> * Remove false information from docstring Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de> * Clarify default for 'required', add explicit example --------- Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de>
- Loading branch information
Showing
11 changed files
with
648 additions
and
0 deletions.
There are no files selected for viewing
57 changes: 57 additions & 0 deletions
57
src/metldata/builtin_transformations/add_content_properties/assumptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Check model assumptions for the add content properties transformation.""" | ||
|
||
from schemapack.spec.schemapack import SchemaPack | ||
|
||
from metldata.builtin_transformations.add_content_properties.instruction import ( | ||
AddContentPropertyInstruction, | ||
) | ||
from metldata.builtin_transformations.add_content_properties.path import ( | ||
resolve_schema_object_path, | ||
) | ||
from metldata.transform.base import ModelAssumptionError | ||
|
||
|
||
def check_model_assumptions( | ||
schema: SchemaPack, | ||
instructions_by_class: dict[str, list[AddContentPropertyInstruction]], | ||
) -> None: | ||
"""Check the model assumptions for the add content properties transformation.""" | ||
for class_name, instructions in instructions_by_class.items(): | ||
class_def = schema.classes.get(class_name) | ||
|
||
# Check if the class exists in the model | ||
if not class_def: | ||
raise ModelAssumptionError( | ||
f"Class {class_name} does not exist in the model." | ||
) | ||
|
||
for instruction in instructions: | ||
# Check if the property already exists in the target schema | ||
try: | ||
target_schema = resolve_schema_object_path( | ||
json_schema=class_def.content.json_schema_dict, | ||
path=instruction.target_content.object_path, | ||
) | ||
except KeyError: | ||
continue | ||
if instruction.target_content.property_name in target_schema.get( | ||
"properties", {} | ||
): | ||
raise ModelAssumptionError( | ||
f"Property {instruction.target_content.property_name} already exists" | ||
+ f" in class {class_name}." | ||
) |
50 changes: 50 additions & 0 deletions
50
src/metldata/builtin_transformations/add_content_properties/config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
"""Models used to describe content properties that shall be deleted.""" | ||
|
||
from pydantic import Field | ||
from pydantic_settings import BaseSettings, SettingsConfigDict | ||
|
||
from metldata.builtin_transformations.add_content_properties.instruction import ( | ||
AddContentPropertyInstruction, | ||
) | ||
|
||
|
||
class AddContentPropertiesConfig(BaseSettings): | ||
"""A Config for a transformation that adds a new property to an object within a | ||
content schema | ||
""" | ||
|
||
model_config = SettingsConfigDict(extra="forbid") | ||
|
||
add_content_properties: list[AddContentPropertyInstruction] = Field( | ||
..., | ||
description=( | ||
"A list of instructions to add content properties to the model and data." | ||
), | ||
) | ||
|
||
def instructions_by_class( | ||
self, | ||
) -> dict[str, list[AddContentPropertyInstruction]]: | ||
"""Returns a dictionary of instructions by class.""" | ||
instructions_by_class: dict[str, list[AddContentPropertyInstruction]] = {} | ||
for instruction in self.add_content_properties: | ||
instructions_by_class.setdefault(instruction.class_name, []).append( | ||
instruction | ||
) | ||
return instructions_by_class |
73 changes: 73 additions & 0 deletions
73
src/metldata/builtin_transformations/add_content_properties/data_transform.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Data transformation logic for the add content properties transformation.""" | ||
|
||
from copy import deepcopy | ||
|
||
from schemapack.spec.datapack import DataPack | ||
|
||
from metldata.builtin_transformations.add_content_properties.instruction import ( | ||
AddContentPropertyInstruction, | ||
) | ||
from metldata.builtin_transformations.add_content_properties.path import ( | ||
resolve_data_object_path, | ||
) | ||
from metldata.transform.base import EvitableTransformationError | ||
|
||
|
||
def add_properties( | ||
*, | ||
data: DataPack, | ||
instructions_by_class: dict[str, list[AddContentPropertyInstruction]], | ||
) -> DataPack: | ||
"""Given a data pack and a dictionary of instructions by class, add the specified | ||
content properties to the data. | ||
Args: | ||
data: | ||
The datapack to add the content properties to. | ||
instructions_by_class: | ||
A dictionary mapping class names to lists of instructions. | ||
Returns: | ||
The data with the specified content properties being added. | ||
""" | ||
modified_data = data.model_copy(deep=True) | ||
|
||
for class_name, instructions in instructions_by_class.items(): | ||
resources = modified_data.resources.get(class_name) | ||
|
||
if not resources: | ||
raise EvitableTransformationError() | ||
|
||
for resource in resources.values(): | ||
for instruction in instructions: | ||
content = resource.content | ||
object = resolve_data_object_path( | ||
data=content, | ||
path=instruction.target_content.object_path, | ||
) | ||
|
||
if ( | ||
not isinstance(object, dict) | ||
or instruction.target_content.property_name in object | ||
): | ||
raise EvitableTransformationError() | ||
|
||
object[instruction.target_content.property_name] = deepcopy( | ||
instruction.value | ||
) | ||
|
||
return modified_data |
72 changes: 72 additions & 0 deletions
72
src/metldata/builtin_transformations/add_content_properties/instruction.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Models for instructions used in the 'add content properties' transformation.""" | ||
|
||
from typing import Any, Final | ||
|
||
from pydantic import Field | ||
from pydantic_settings import BaseSettings | ||
|
||
DEFAULT_CONTENT_SCHEMA: Final[dict[str, Any]] = { | ||
"type": "object", | ||
"additionalProperties": False, | ||
} | ||
|
||
|
||
class NewContentSchemaPath(BaseSettings): | ||
"""A model describing the path of an object property within the content schema that | ||
is yet to be added. The model comprises a path to an already existing object within | ||
the content schema and the name of a property to be added to that object's schema | ||
""" | ||
|
||
object_path: str = Field( | ||
..., | ||
description=( | ||
"The path to the content object to which a property shall be added. The" | ||
+ " path must be specified in dot notation, equivalently to JavaScript" | ||
+ " property accessors." | ||
), | ||
examples=["some_property.another_nested_property"], | ||
) | ||
|
||
property_name: str = Field(..., description="The name of the property to be added.") | ||
|
||
|
||
class AddContentPropertyInstruction(BaseSettings): | ||
"""A model describing an instruction to add a new content property to a class in a | ||
schemapack, including an associated default value in corresponding data. | ||
""" | ||
|
||
class_name: str = Field(..., description="The name of the class to modify.") | ||
|
||
target_content: NewContentSchemaPath | ||
|
||
required: bool = Field( | ||
True, | ||
description=( | ||
"Indicates whether the newly added property shall be added to the" | ||
+ " 'required' list of the corresponding object. Defaults to 'True'." | ||
), | ||
) | ||
|
||
content_schema: dict[str, Any] = Field( | ||
DEFAULT_CONTENT_SCHEMA, | ||
description="The JSON schema of the newly added property.", | ||
) | ||
|
||
value: Any = Field( | ||
{}, description="A value to assign to the new property in the data." | ||
) |
85 changes: 85 additions & 0 deletions
85
src/metldata/builtin_transformations/add_content_properties/main.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
"""A transformation to add content properties.""" | ||
|
||
from schemapack.spec.datapack import DataPack | ||
from schemapack.spec.schemapack import SchemaPack | ||
|
||
from metldata.builtin_transformations.add_content_properties.assumptions import ( | ||
check_model_assumptions, | ||
) | ||
from metldata.builtin_transformations.add_content_properties.config import ( | ||
AddContentPropertiesConfig, | ||
) | ||
from metldata.builtin_transformations.add_content_properties.data_transform import ( | ||
add_properties, | ||
) | ||
from metldata.builtin_transformations.add_content_properties.model_transform import ( | ||
add_content_properties, | ||
) | ||
from metldata.transform.base import ( | ||
DataTransformer, | ||
TransformationDefinition, | ||
) | ||
|
||
|
||
class AddContentPropertiesTransformer(DataTransformer[AddContentPropertiesConfig]): | ||
"""A transformer that deletes content properties from data.""" | ||
|
||
def transform(self, data: DataPack) -> DataPack: | ||
"""Transforms data. | ||
Args: | ||
data: The data as DataPack to be transformed. | ||
""" | ||
return add_properties( | ||
data=data, instructions_by_class=self._config.instructions_by_class() | ||
) | ||
|
||
|
||
def check_model_assumptions_wrapper( | ||
model: SchemaPack, | ||
config: AddContentPropertiesConfig, | ||
) -> None: | ||
"""Check the assumptions of the model. | ||
Raises: | ||
ModelAssumptionError: | ||
if the model does not fulfill the assumptions. | ||
""" | ||
check_model_assumptions( | ||
schema=model, instructions_by_class=config.instructions_by_class() | ||
) | ||
|
||
|
||
def transform_model( | ||
model: SchemaPack, config: AddContentPropertiesConfig | ||
) -> SchemaPack: | ||
"""Transform the data model.""" | ||
return add_content_properties( | ||
model=model, instructions_by_class=config.instructions_by_class() | ||
) | ||
|
||
|
||
ADD_CONTENT_PROPERTIES_TRANSFORMATION = TransformationDefinition[ | ||
AddContentPropertiesConfig | ||
]( | ||
config_cls=AddContentPropertiesConfig, | ||
check_model_assumptions=check_model_assumptions_wrapper, | ||
transform_model=transform_model, | ||
data_transformer_factory=AddContentPropertiesTransformer, | ||
) |
Oops, something went wrong.