generated from ghga-de/microservice-repository-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Re-implemented relations inference transformation (GSI-530) (#57)
... using schemapack. Moreover: - replaced term `original` by `input` in context of a transformation to better distinguish from the original model of the untransformed submission - distinguish data validation before and after transformation using dedicated exceptions
- Loading branch information
1 parent
5d72617
commit c61bd6f
Showing
44 changed files
with
2,539 additions
and
86 deletions.
There are no files selected for viewing
23 changes: 23 additions & 0 deletions
23
src/metldata/schemapack_/builtin_transformations/infer_relations/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
"""A transformation to infer references based on existing ones in the metadata model.""" | ||
|
||
|
||
# shortcuts: | ||
from metldata.schemapack_.builtin_transformations.infer_relations.main import ( # noqa: F401 | ||
RELATION_INFERENCE_TRANSFORMATION, | ||
) |
99 changes: 99 additions & 0 deletions
99
src/metldata/schemapack_/builtin_transformations/infer_relations/assumptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
"""Check model assumptions.""" | ||
|
||
from schemapack.spec.schemapack import SchemaPack | ||
|
||
from metldata.schemapack_.builtin_transformations.infer_relations.path.path import ( | ||
RelationPath, | ||
) | ||
from metldata.schemapack_.builtin_transformations.infer_relations.path.path_elements import ( | ||
RelationPathElementType, | ||
) | ||
from metldata.schemapack_.builtin_transformations.infer_relations.relations import ( | ||
InferenceInstruction, | ||
) | ||
from metldata.schemapack_.transform.base import ModelAssumptionError | ||
|
||
|
||
def assert_path_classes_and_relations(model: SchemaPack, path: RelationPath): | ||
"""Make sure that all classes and relations defined in the provided path exist in | ||
the provided model. | ||
Raises: | ||
ModelAssumptionError: | ||
if the model does not fulfill the assumptions. | ||
""" | ||
for path_element in path.elements: | ||
if path_element.source not in model.classes: | ||
raise ModelAssumptionError( | ||
f"Class {path_element.source} not found in model." | ||
) | ||
|
||
if path_element.target not in model.classes: | ||
raise ModelAssumptionError( | ||
f"Class {path_element.target} not found in model." | ||
) | ||
|
||
if path_element.type_ == RelationPathElementType.ACTIVE: | ||
if ( | ||
path_element.property | ||
not in model.classes[path_element.source].relations | ||
): | ||
raise ModelAssumptionError( | ||
f"Relation property {path_element.property} not found in class" | ||
f" {path_element.source}." | ||
) | ||
|
||
return | ||
|
||
if path_element.property not in model.classes[path_element.target].relations: | ||
raise ModelAssumptionError( | ||
f"Relation property {path_element.property} not found in class" | ||
f" {path_element.target}." | ||
) | ||
|
||
|
||
def assert_new_property_not_exists( | ||
model: SchemaPack, instruction: InferenceInstruction | ||
) -> None: | ||
"""Make sure that new property specified in the instruction does not yet exist in | ||
the model. The existence of the source class is not checked. | ||
""" | ||
source_class = model.classes.get(instruction.source) | ||
if source_class and instruction.new_property in source_class.relations: | ||
raise ModelAssumptionError( | ||
f"Property '{instruction.new_property}' of class '{instruction.source}'" | ||
+ ", intended to store an inferred relation, does already exist." | ||
) | ||
|
||
|
||
def assert_instructions_match_model( | ||
*, | ||
model: SchemaPack, | ||
instructions: list[InferenceInstruction], | ||
) -> None: | ||
"""Make sure that the provided inference instructions can be applied to the provided | ||
model. | ||
Raises: | ||
ModelAssumptionError: | ||
if the model does not fulfill the assumptions. | ||
""" | ||
for instruction in instructions: | ||
assert_path_classes_and_relations(model=model, path=instruction.path) | ||
assert_new_property_not_exists(model=model, instruction=instruction) |
79 changes: 79 additions & 0 deletions
79
src/metldata/schemapack_/builtin_transformations/infer_relations/config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln | ||
# for the German Human Genome-Phenome Archive (GHGA) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
"""Models used to describe all inferred relations based on existing relations.""" | ||
|
||
from functools import cached_property | ||
|
||
from pydantic import Field | ||
from pydantic_settings import BaseSettings, SettingsConfigDict | ||
|
||
from metldata.schemapack_.builtin_transformations.infer_relations.relations import ( | ||
InferenceInstruction, | ||
RelationDetails, | ||
) | ||
|
||
|
||
class RelationInferenceConfig(BaseSettings): | ||
"""Config containing instructions to infer relations for all classes of a model.""" | ||
|
||
model_config = SettingsConfigDict(extra="forbid") | ||
|
||
inferred_relations: dict[str, dict[str, RelationDetails]] = Field( | ||
..., | ||
description=( | ||
"A nested dictionary describing instructions to infer relations based" | ||
+ " on existing relations. On the first level keys refer to classes to" | ||
+ " which the inferred relations should be added. On the second level, keys" | ||
+ " refer to the names of the new property of the host class that hold the" | ||
+ " inferred relation. The values refer to the actual relation details." | ||
), | ||
examples=[ | ||
{ | ||
"ClassA": { | ||
"class_d": { | ||
"path": "ClassA(class_b)>ClassB(class_d)>ClassD", | ||
"cardinality": "many_to_many", | ||
}, | ||
"class_c": { | ||
"path": "ClassA(class_b)>ClassB<(class_c)ClassC", | ||
"cardinality": "many_to_one", | ||
}, | ||
}, | ||
"ClassB": { | ||
"class_c": { | ||
"path": "ClassB<(class_c)ClassC", | ||
"cardinality": "many_to_many", | ||
} | ||
}, | ||
} | ||
], | ||
) | ||
|
||
@cached_property | ||
def inference_instructions(self) -> list[InferenceInstruction]: | ||
"""A list of inferred relations.""" | ||
return [ | ||
InferenceInstruction( | ||
source=source, | ||
target=relation_details.path.target, | ||
path=relation_details.path, | ||
new_property=property_name, | ||
allow_multiple=relation_details.allow_multiple, | ||
) | ||
for source, slot_description in self.inferred_relations.items() | ||
for property_name, relation_details in slot_description.items() | ||
] |
Oops, something went wrong.