Skip to content

Commit

Permalink
Re-implemented relations inference transformation (GSI-530) (#57)
Browse files Browse the repository at this point in the history
... using schemapack.

Moreover:
- replaced term `original` by `input` in context of a transformation to
better distinguish from the original model of the untransformed submission
- distinguish data validation before and after transformation using dedicated exceptions
  • Loading branch information
KerstenBreuer authored Dec 13, 2023
1 parent 5d72617 commit c61bd6f
Show file tree
Hide file tree
Showing 44 changed files with 2,539 additions and 86 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A transformation to infer references based on existing ones in the metadata model."""


# shortcuts:
from metldata.schemapack_.builtin_transformations.infer_relations.main import ( # noqa: F401
RELATION_INFERENCE_TRANSFORMATION,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Check model assumptions."""

from schemapack.spec.schemapack import SchemaPack

from metldata.schemapack_.builtin_transformations.infer_relations.path.path import (
RelationPath,
)
from metldata.schemapack_.builtin_transformations.infer_relations.path.path_elements import (
RelationPathElementType,
)
from metldata.schemapack_.builtin_transformations.infer_relations.relations import (
InferenceInstruction,
)
from metldata.schemapack_.transform.base import ModelAssumptionError


def assert_path_classes_and_relations(model: SchemaPack, path: RelationPath):
"""Make sure that all classes and relations defined in the provided path exist in
the provided model.
Raises:
ModelAssumptionError:
if the model does not fulfill the assumptions.
"""
for path_element in path.elements:
if path_element.source not in model.classes:
raise ModelAssumptionError(
f"Class {path_element.source} not found in model."
)

if path_element.target not in model.classes:
raise ModelAssumptionError(
f"Class {path_element.target} not found in model."
)

if path_element.type_ == RelationPathElementType.ACTIVE:
if (
path_element.property
not in model.classes[path_element.source].relations
):
raise ModelAssumptionError(
f"Relation property {path_element.property} not found in class"
f" {path_element.source}."
)

return

if path_element.property not in model.classes[path_element.target].relations:
raise ModelAssumptionError(
f"Relation property {path_element.property} not found in class"
f" {path_element.target}."
)


def assert_new_property_not_exists(
model: SchemaPack, instruction: InferenceInstruction
) -> None:
"""Make sure that new property specified in the instruction does not yet exist in
the model. The existence of the source class is not checked.
"""
source_class = model.classes.get(instruction.source)
if source_class and instruction.new_property in source_class.relations:
raise ModelAssumptionError(
f"Property '{instruction.new_property}' of class '{instruction.source}'"
+ ", intended to store an inferred relation, does already exist."
)


def assert_instructions_match_model(
*,
model: SchemaPack,
instructions: list[InferenceInstruction],
) -> None:
"""Make sure that the provided inference instructions can be applied to the provided
model.
Raises:
ModelAssumptionError:
if the model does not fulfill the assumptions.
"""
for instruction in instructions:
assert_path_classes_and_relations(model=model, path=instruction.path)
assert_new_property_not_exists(model=model, instruction=instruction)
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Models used to describe all inferred relations based on existing relations."""

from functools import cached_property

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

from metldata.schemapack_.builtin_transformations.infer_relations.relations import (
InferenceInstruction,
RelationDetails,
)


class RelationInferenceConfig(BaseSettings):
"""Config containing instructions to infer relations for all classes of a model."""

model_config = SettingsConfigDict(extra="forbid")

inferred_relations: dict[str, dict[str, RelationDetails]] = Field(
...,
description=(
"A nested dictionary describing instructions to infer relations based"
+ " on existing relations. On the first level keys refer to classes to"
+ " which the inferred relations should be added. On the second level, keys"
+ " refer to the names of the new property of the host class that hold the"
+ " inferred relation. The values refer to the actual relation details."
),
examples=[
{
"ClassA": {
"class_d": {
"path": "ClassA(class_b)>ClassB(class_d)>ClassD",
"cardinality": "many_to_many",
},
"class_c": {
"path": "ClassA(class_b)>ClassB<(class_c)ClassC",
"cardinality": "many_to_one",
},
},
"ClassB": {
"class_c": {
"path": "ClassB<(class_c)ClassC",
"cardinality": "many_to_many",
}
},
}
],
)

@cached_property
def inference_instructions(self) -> list[InferenceInstruction]:
"""A list of inferred relations."""
return [
InferenceInstruction(
source=source,
target=relation_details.path.target,
path=relation_details.path,
new_property=property_name,
allow_multiple=relation_details.allow_multiple,
)
for source, slot_description in self.inferred_relations.items()
for property_name, relation_details in slot_description.items()
]
Loading

0 comments on commit c61bd6f

Please sign in to comment.