generated from ghga-de/microservice-repository-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
833 additions
and
0 deletions.
There are no files selected for viewing
171 changes: 171 additions & 0 deletions
171
src/metldata/builtin_transformations/count_content_values/assumptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Check model assumptions for the add content properties transformation.""" | ||
|
||
from schemapack.spec.schemapack import ClassDefinition, SchemaPack | ||
|
||
from metldata.builtin_transformations.add_content_properties.path import ( | ||
resolve_schema_object_path, | ||
) | ||
from metldata.builtin_transformations.count_content_values.instruction import ( | ||
CountContentValuesInstruction, | ||
) | ||
from metldata.builtin_transformations.count_content_values.path.path import RelationPath | ||
from metldata.builtin_transformations.count_content_values.path.path_elements import ( | ||
RelationPathElementType, | ||
) | ||
from metldata.transform.base import ModelAssumptionError | ||
|
||
|
||
def check_model_assumptions( | ||
schema: SchemaPack, | ||
instructions: list[CountContentValuesInstruction], | ||
) -> None: | ||
"""Check the model assumptions for the add content properties transformation.""" | ||
for instruction in instructions: | ||
class_name = instruction.class_name | ||
class_def = schema.classes.get(class_name) | ||
|
||
# Check if the class exists in the model | ||
if not class_def: | ||
raise ModelAssumptionError( | ||
f"Class {class_name} does not exist in the model." | ||
) | ||
|
||
assert_target_path_assumptions( | ||
class_def=class_def, class_name=class_name, instruction=instruction | ||
) | ||
assert_relation_path_assumptions( | ||
model=schema, path=instruction.source.relation_path | ||
) | ||
|
||
|
||
def assert_target_path_assumptions( | ||
*, | ||
class_def: ClassDefinition, | ||
class_name: str, | ||
instruction: CountContentValuesInstruction, | ||
): | ||
"""Assert object path exists and property does not already exists in the target content""" | ||
object_path = instruction.target_content.object_path | ||
property_name = instruction.target_content.property_name | ||
|
||
# check if content schema exists for given object path | ||
try: | ||
target_schema = resolve_schema_object_path( | ||
json_schema=class_def.content.json_schema_dict, | ||
path=object_path, | ||
) | ||
except KeyError as error: | ||
raise ModelAssumptionError( | ||
f"Target object path { | ||
object_path} does not exist in class {class_name}." | ||
) from error | ||
|
||
if property_name in target_schema.get("properties", {}): | ||
raise ModelAssumptionError( | ||
f"Property {property_name} already exists for object path { | ||
object_path} in class {class_name}." | ||
) | ||
|
||
|
||
def assert_relation_path_assumptions(model: SchemaPack, path: RelationPath): | ||
"""Make sure that all classes and relations defined in the provided path exist in | ||
the provided model. | ||
Raises: | ||
ModelAssumptionError: | ||
if the model does not fulfill the assumptions. | ||
""" | ||
for path_element in path.elements: | ||
if path_element.source not in model.classes: | ||
raise ModelAssumptionError( | ||
f"Class {path_element.source} not found in model." | ||
) | ||
|
||
if path_element.target not in model.classes: | ||
raise ModelAssumptionError( | ||
f"Class {path_element.target} not found in model." | ||
) | ||
|
||
if path_element.type_ == RelationPathElementType.ACTIVE: | ||
if ( | ||
path_element.property | ||
not in model.classes[path_element.source].relations | ||
): | ||
raise ModelAssumptionError( | ||
f"Relation property { | ||
path_element.property} not found in class" | ||
f" {path_element.source}." | ||
) | ||
|
||
return | ||
|
||
if path_element.property not in model.classes[path_element.target].relations: | ||
raise ModelAssumptionError( | ||
f"Relation property {path_element.property} not found in class" | ||
f" {path_element.target}." | ||
) | ||
|
||
|
||
def assert_relational_multiplicity( | ||
model: SchemaPack, relation_path: RelationPath, content_path: str | ||
): | ||
"""TODO""" | ||
for path_element in relation_path.elements: | ||
if path_element.type_ == RelationPathElementType.ACTIVE: | ||
multiplicity = ( | ||
model.classes[path_element.source] | ||
.relations[path_element.property] | ||
.multiple | ||
) | ||
if not multiplicity.origin or not multiplicity.target: | ||
raise ModelAssumptionError( | ||
f"Relation property { | ||
path_element.property} not found in class" | ||
f" {path_element.target}." | ||
) | ||
|
||
target_content_schema = model.classes[ | ||
path_element.target | ||
].content.json_schema_dict | ||
|
||
try: | ||
target_schema = resolve_schema_object_path( | ||
json_schema=target_content_schema, | ||
path=content_path, | ||
) | ||
except KeyError as error: | ||
raise ModelAssumptionError( | ||
f"{path_element.target} does not contain the property { | ||
content_path}" | ||
) from error | ||
|
||
if target_schema["type"] != "integer": | ||
raise ModelAssumptionError(f"{content_path} of class { | ||
path_element.target} is not an integer property.") | ||
|
||
else: | ||
multiplicity = ( | ||
model.classes[path_element.target] | ||
.relations[path_element.property] | ||
.multiple | ||
) | ||
if not multiplicity.origin or multiplicity.target: | ||
raise ModelAssumptionError( | ||
f"Relation property { | ||
path_element.property} not found in class" | ||
f" {path_element.target}." | ||
) |
34 changes: 34 additions & 0 deletions
34
src/metldata/builtin_transformations/count_content_values/config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
"""Models used to describe content properties that shall be deleted.""" | ||
|
||
from pydantic import Field | ||
from pydantic_settings import BaseSettings, SettingsConfigDict | ||
|
||
from metldata.builtin_transformations.count_content_values.instruction import ( | ||
CountContentValuesInstruction, | ||
) | ||
|
||
|
||
class CountContentValuesConfig(BaseSettings): | ||
"""TODO""" | ||
|
||
model_config = SettingsConfigDict(extra="forbid") | ||
|
||
count_content_values: list[CountContentValuesInstruction] = Field( | ||
..., description="A list of instructions for counting content properties." | ||
) |
73 changes: 73 additions & 0 deletions
73
src/metldata/builtin_transformations/count_content_values/data_transform.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Data transformation logic for the add content properties transformation.""" | ||
|
||
from copy import deepcopy | ||
|
||
from schemapack.spec.datapack import DataPack | ||
|
||
from metldata.builtin_transformations.add_content_properties.instruction import ( | ||
AddContentPropertyInstruction, | ||
) | ||
from metldata.builtin_transformations.add_content_properties.path import ( | ||
resolve_data_object_path, | ||
) | ||
from metldata.transform.base import EvitableTransformationError | ||
|
||
|
||
def add_properties( | ||
*, | ||
data: DataPack, | ||
instructions_by_class: dict[str, list[AddContentPropertyInstruction]], | ||
) -> DataPack: | ||
"""Given a data pack and a dictionary of instructions by class, add the specified | ||
content properties to the data. | ||
Args: | ||
data: | ||
The datapack to add the content properties to. | ||
instructions_by_class: | ||
A dictionary mapping class names to lists of instructions. | ||
Returns: | ||
The data with the specified content properties being added. | ||
""" | ||
modified_data = data.model_copy(deep=True) | ||
|
||
for class_name, instructions in instructions_by_class.items(): | ||
resources = modified_data.resources.get(class_name) | ||
|
||
if not resources: | ||
raise EvitableTransformationError() | ||
|
||
for resource in resources.values(): | ||
for instruction in instructions: | ||
content = resource.content | ||
object = resolve_data_object_path( | ||
data=content, | ||
path=instruction.target_content.object_path, | ||
) | ||
|
||
if ( | ||
not isinstance(object, dict) | ||
or instruction.target_content.property_name in object | ||
): | ||
raise EvitableTransformationError() | ||
|
||
object[instruction.target_content.property_name] = deepcopy( | ||
instruction.value | ||
) | ||
|
||
return modified_data |
44 changes: 44 additions & 0 deletions
44
src/metldata/builtin_transformations/count_content_values/instruction.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Models for instructions used in the 'add content properties' transformation.""" | ||
|
||
from typing import Any, Final | ||
|
||
from pydantic import Field | ||
from pydantic_settings import BaseSettings | ||
|
||
from metldata.builtin_transformations.common import NewContentSchemaPath | ||
from metldata.builtin_transformations.count_content_values.path.path import RelationPath | ||
|
||
ADDED_PROPERTY_SCHEMA: Final[dict[str, Any]] = { | ||
"type": "object", | ||
"additionalProperties": True, | ||
} | ||
|
||
|
||
class SourcePaths(BaseSettings): | ||
"""TODO""" | ||
|
||
relation_path: RelationPath | ||
content_path: str | ||
|
||
|
||
class CountContentValuesInstruction(BaseSettings): | ||
"""TODO""" | ||
|
||
class_name: str = Field(..., description="The name of the class to modify.") | ||
target_content: NewContentSchemaPath | ||
source: SourcePaths |
Oops, something went wrong.