Skip to content

Commit

Permalink
Implement "Add content property" transformation (GSI-729) (#68)
Browse files Browse the repository at this point in the history
* Add config schema

* Add boilerplate and basic test case

* Add schema transformation

* Add data transformation

* Fix shallow copy bug

* Add test data annotations

* Update src/metldata/builtin_transformations/add_content_properties/assumptions.py

Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de>

* Update src/metldata/builtin_transformations/add_content_properties/model_transform.py

Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de>

* Remove false information from docstring

Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de>

* Clarify default for 'required', add explicit example

---------

Co-authored-by: Christoph Zwerschke <c.zwerschke@dkfz-heidelberg.de>
  • Loading branch information
lkuchenb and Cito authored May 15, 2024
1 parent 2ed8ca3 commit 7052672
Show file tree
Hide file tree
Showing 11 changed files with 648 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Check model assumptions for the add content properties transformation."""

from schemapack.spec.schemapack import SchemaPack

from metldata.builtin_transformations.add_content_properties.instruction import (
AddContentPropertyInstruction,
)
from metldata.builtin_transformations.add_content_properties.path import (
resolve_schema_object_path,
)
from metldata.transform.base import ModelAssumptionError


def check_model_assumptions(
schema: SchemaPack,
instructions_by_class: dict[str, list[AddContentPropertyInstruction]],
) -> None:
"""Check the model assumptions for the add content properties transformation."""
for class_name, instructions in instructions_by_class.items():
class_def = schema.classes.get(class_name)

# Check if the class exists in the model
if not class_def:
raise ModelAssumptionError(
f"Class {class_name} does not exist in the model."
)

for instruction in instructions:
# Check if the property already exists in the target schema
try:
target_schema = resolve_schema_object_path(
json_schema=class_def.content.json_schema_dict,
path=instruction.target_content.object_path,
)
except KeyError:
continue
if instruction.target_content.property_name in target_schema.get(
"properties", {}
):
raise ModelAssumptionError(
f"Property {instruction.target_content.property_name} already exists"
+ f" in class {class_name}."
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Models used to describe content properties that shall be deleted."""

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

from metldata.builtin_transformations.add_content_properties.instruction import (
AddContentPropertyInstruction,
)


class AddContentPropertiesConfig(BaseSettings):
"""A Config for a transformation that adds a new property to an object within a
content schema
"""

model_config = SettingsConfigDict(extra="forbid")

add_content_properties: list[AddContentPropertyInstruction] = Field(
...,
description=(
"A list of instructions to add content properties to the model and data."
),
)

def instructions_by_class(
self,
) -> dict[str, list[AddContentPropertyInstruction]]:
"""Returns a dictionary of instructions by class."""
instructions_by_class: dict[str, list[AddContentPropertyInstruction]] = {}
for instruction in self.add_content_properties:
instructions_by_class.setdefault(instruction.class_name, []).append(
instruction
)
return instructions_by_class
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data transformation logic for the add content properties transformation."""

from copy import deepcopy

from schemapack.spec.datapack import DataPack

from metldata.builtin_transformations.add_content_properties.instruction import (
AddContentPropertyInstruction,
)
from metldata.builtin_transformations.add_content_properties.path import (
resolve_data_object_path,
)
from metldata.transform.base import EvitableTransformationError


def add_properties(
*,
data: DataPack,
instructions_by_class: dict[str, list[AddContentPropertyInstruction]],
) -> DataPack:
"""Given a data pack and a dictionary of instructions by class, add the specified
content properties to the data.
Args:
data:
The datapack to add the content properties to.
instructions_by_class:
A dictionary mapping class names to lists of instructions.
Returns:
The data with the specified content properties being added.
"""
modified_data = data.model_copy(deep=True)

for class_name, instructions in instructions_by_class.items():
resources = modified_data.resources.get(class_name)

if not resources:
raise EvitableTransformationError()

for resource in resources.values():
for instruction in instructions:
content = resource.content
object = resolve_data_object_path(
data=content,
path=instruction.target_content.object_path,
)

if (
not isinstance(object, dict)
or instruction.target_content.property_name in object
):
raise EvitableTransformationError()

object[instruction.target_content.property_name] = deepcopy(
instruction.value
)

return modified_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Models for instructions used in the 'add content properties' transformation."""

from typing import Any, Final

from pydantic import Field
from pydantic_settings import BaseSettings

DEFAULT_CONTENT_SCHEMA: Final[dict[str, Any]] = {
"type": "object",
"additionalProperties": False,
}


class NewContentSchemaPath(BaseSettings):
"""A model describing the path of an object property within the content schema that
is yet to be added. The model comprises a path to an already existing object within
the content schema and the name of a property to be added to that object's schema
"""

object_path: str = Field(
...,
description=(
"The path to the content object to which a property shall be added. The"
+ " path must be specified in dot notation, equivalently to JavaScript"
+ " property accessors."
),
examples=["some_property.another_nested_property"],
)

property_name: str = Field(..., description="The name of the property to be added.")


class AddContentPropertyInstruction(BaseSettings):
"""A model describing an instruction to add a new content property to a class in a
schemapack, including an associated default value in corresponding data.
"""

class_name: str = Field(..., description="The name of the class to modify.")

target_content: NewContentSchemaPath

required: bool = Field(
True,
description=(
"Indicates whether the newly added property shall be added to the"
+ " 'required' list of the corresponding object. Defaults to 'True'."
),
)

content_schema: dict[str, Any] = Field(
DEFAULT_CONTENT_SCHEMA,
description="The JSON schema of the newly added property.",
)

value: Any = Field(
{}, description="A value to assign to the new property in the data."
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A transformation to add content properties."""

from schemapack.spec.datapack import DataPack
from schemapack.spec.schemapack import SchemaPack

from metldata.builtin_transformations.add_content_properties.assumptions import (
check_model_assumptions,
)
from metldata.builtin_transformations.add_content_properties.config import (
AddContentPropertiesConfig,
)
from metldata.builtin_transformations.add_content_properties.data_transform import (
add_properties,
)
from metldata.builtin_transformations.add_content_properties.model_transform import (
add_content_properties,
)
from metldata.transform.base import (
DataTransformer,
TransformationDefinition,
)


class AddContentPropertiesTransformer(DataTransformer[AddContentPropertiesConfig]):
"""A transformer that deletes content properties from data."""

def transform(self, data: DataPack) -> DataPack:
"""Transforms data.
Args:
data: The data as DataPack to be transformed.
"""
return add_properties(
data=data, instructions_by_class=self._config.instructions_by_class()
)


def check_model_assumptions_wrapper(
model: SchemaPack,
config: AddContentPropertiesConfig,
) -> None:
"""Check the assumptions of the model.
Raises:
ModelAssumptionError:
if the model does not fulfill the assumptions.
"""
check_model_assumptions(
schema=model, instructions_by_class=config.instructions_by_class()
)


def transform_model(
model: SchemaPack, config: AddContentPropertiesConfig
) -> SchemaPack:
"""Transform the data model."""
return add_content_properties(
model=model, instructions_by_class=config.instructions_by_class()
)


ADD_CONTENT_PROPERTIES_TRANSFORMATION = TransformationDefinition[
AddContentPropertiesConfig
](
config_cls=AddContentPropertiesConfig,
check_model_assumptions=check_model_assumptions_wrapper,
transform_model=transform_model,
data_transformer_factory=AddContentPropertiesTransformer,
)
Loading

0 comments on commit 7052672

Please sign in to comment.