Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
KerstenBreuer committed Dec 7, 2023
1 parent 3e9918e commit 84dda98
Show file tree
Hide file tree
Showing 12 changed files with 1,189 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A transformation to infer references based on existing ones in the metadata model."""


# shortcuts:
# pylint: disable=unused-import
from metldata.builtin_transformations.infer_references.main import ( # noqa: F401
REFERENCE_INFERENCE_TRANSFORMATION,
ReferenceInferenceConfig,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Models used to describe all inferred relations based on existing relations."""

from functools import cached_property

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

from metldata.schemapack_.builtin_transformations.infer_relations.relations import (
InferredRelations,
RelationDetails,
)


class RelationInferenceConfig(BaseSettings):
"""Config containing instructions to infer relations for all classes of a model."""

model_config = SettingsConfigDict(extra="forbid")

inferred_relations: dict[str, dict[str, RelationDetails]] = Field(
...,
description=(
"A nested dictionary describing instructions to infer relations based"
+ " on existing relations. On the first level keys refer to classes to"
+ " which the inferred relations should be added. On the second level, keys"
+ " refer to the names of the new property of the host class that hold the"
+ " inferred relation. The values refer to the actual relation details."
),
examples=[
{
"ClassA": {
"class_d": {
"path": "ClassA(class_b)>ClassB(class_d)>ClassD",
"cardinality": "many_to_many",
},
"class_c": {
"path": "ClassA(class_b)>ClassB<(class_c)ClassC",
"cardinality": "many_to_one",
},
},
"ClassB": {
"class_c": {
"path": "ClassB<(class_c)ClassC",
"cardinality": "many_to_many",
}
},
}
],
)

@cached_property
def inferred_relation_list(self) -> list[InferredRelations]:
"""A list of inferred relations."""
inferred_refs: list[InferredRelations] = []

for source, slot_description in self.inferred_relations.items():
for property_name, relation_details in slot_description.items():
target = relation_details.path.target
inferred_refs.append(
InferredRelations(
source=source,
target=target,
path=relation_details.path,
new_property=property_name,
expect_multiple=relation_details.expect_multiple,
)
)

return inferred_refs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""A transformation to infer references based on existing ones in the metadata model."""

from metldata.builtin_transformations.infer_references.config import (
ReferenceInferenceConfig,
)
from metldata.builtin_transformations.infer_references.metadata_transform import (
add_references_to_metadata,
)
from metldata.builtin_transformations.infer_references.model_transform import (
add_references_to_model,
)
from metldata.event_handling.models import SubmissionAnnotation
from metldata.model_utils.anchors import get_anchors_points_by_target
from metldata.model_utils.assumptions import check_basic_model_assumption
from metldata.model_utils.essentials import MetadataModel
from metldata.transform.base import Json, MetadataTransformer, TransformationDefinition


class ReferenceInferenceMetadataTransformer(
MetadataTransformer[ReferenceInferenceConfig]
):
"""A transformer that infers references in metadata based on existing ones."""

def __init__(
self,
config: ReferenceInferenceConfig,
original_model: MetadataModel,
transformed_model: MetadataModel,
):
"""Initialize the transformer."""
super().__init__(
config=config,
original_model=original_model,
transformed_model=transformed_model,
)

self._anchor_points_by_target = get_anchors_points_by_target(
model=self._original_model
)

def transform(self, *, metadata: Json, annotation: SubmissionAnnotation) -> Json:
"""Transforms metadata.
Args:
metadata: The metadata to be transformed.
annotation: The annotation on the metadata.
Raises:
MetadataTransformationError:
if the transformation fails.
"""
return add_references_to_metadata(
metadata=metadata,
references=self._config.inferred_references,
anchor_points_by_target=self._anchor_points_by_target,
)


def check_model_assumptions(
model: MetadataModel,
config: ReferenceInferenceConfig, # pylint: disable=unused-argument
) -> None:
"""Check the assumptions of the model.
Raises:
MetadataModelAssumptionError:
if the model does not fulfill the assumptions.
"""
check_basic_model_assumption(model=model)


def transform_model(
model: MetadataModel, config: ReferenceInferenceConfig
) -> MetadataModel:
"""Transform the metadata model.
Raises:
MetadataModelTransformationError:
if the transformation fails.
"""
return add_references_to_model(
model=model,
references=config.inferred_references,
)


REFERENCE_INFERENCE_TRANSFORMATION = TransformationDefinition[ReferenceInferenceConfig](
config_cls=ReferenceInferenceConfig,
check_model_assumptions=check_model_assumptions,
transform_model=transform_model,
metadata_transformer_factory=ReferenceInferenceMetadataTransformer,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Logic for transforming metadata."""


from metldata.builtin_transformations.infer_references.path.resolve import (
resolve_reference_for_metadata_resource,
)
from metldata.builtin_transformations.infer_references.reference import (
InferredReference,
)
from metldata.metadata_utils import (
SelfIdLookUpError,
get_resources_of_class,
lookup_self_id,
upsert_resources_in_metadata,
)
from metldata.model_utils.anchors import (
AnchorPoint,
AnchorPointNotFoundError,
lookup_anchor_point,
)
from metldata.transform.base import (
Json,
MetadataModelTransformationError,
MetadataTransformationError,
)


def add_reference_to_metadata_resource(
resource: Json,
global_metadata: Json,
reference: InferredReference,
anchor_points_by_target: dict[str, AnchorPoint],
) -> Json:
"""Add an inferred reference to an individual metadata resource.
Args:
resource: The metadata resource to modify.
global_metadata: The global metadata context to look up references in.
reference: The inferred reference.
anchor_points: The anchor points of the metadata model.
Raises:
MetadataTransformationError:
if the transformation of the metadata fails.
"""
try:
target_anchor_point = lookup_anchor_point(
class_name=reference.target, anchor_points_by_target=anchor_points_by_target
)
except AnchorPointNotFoundError as error:
raise MetadataModelTransformationError(
f"Cannot add reference '{reference}' to metadata resource '{resource}'"
+ " because the target anchor point could not be found."
) from error

if reference.new_slot in resource:
raise MetadataModelTransformationError(
f"Cannot add reference '{reference}' to metadata resource '{resource}'"
+ f" because the target slot '{reference.new_slot}' already exists."
)

target_resources = resolve_reference_for_metadata_resource(
resource=resource,
global_metadata=global_metadata,
reference_path=reference.path,
anchor_points_by_target=anchor_points_by_target,
)

# get IDs of final target resources:
target_ids: set[str] = set()
for target_resource in target_resources:
try:
target_ids.add(
lookup_self_id(
resource=target_resource,
identifier_slot=target_anchor_point.identifier_slot,
)
)
except SelfIdLookUpError as error:
raise MetadataTransformationError(
f"Cannot add reference '{reference}' to metadata resource '{resource}'"
+ f" because the target resource '{target_resource}' does not have"
+ f" an identifier in slot '{target_anchor_point.identifier_slot}'."
) from error

# add the target IDs to the source resource:
resource_copy = resource.copy()
resource_copy[reference.new_slot] = sorted(target_ids)

return resource_copy


def add_reference_to_metadata(
*,
metadata: Json,
reference: InferredReference,
anchor_points_by_target: dict[str, AnchorPoint],
) -> Json:
"""Transform metadata by adding an inferred reference.
Raises:
MetadataTransformationError:
if the transformation of the metadata fails.
"""
resources = get_resources_of_class(
global_metadata=metadata,
class_name=reference.source,
anchor_points_by_target=anchor_points_by_target,
)

modified_resources = [
add_reference_to_metadata_resource(
resource=resource,
global_metadata=metadata,
reference=reference,
anchor_points_by_target=anchor_points_by_target,
)
for resource in resources
]

return upsert_resources_in_metadata(
resources=modified_resources,
class_name=reference.source,
global_metadata=metadata,
anchor_points_by_target=anchor_points_by_target,
)


def add_references_to_metadata(
*,
metadata: Json,
references: list[InferredReference],
anchor_points_by_target: dict[str, AnchorPoint],
) -> Json:
"""Transform metadata and return the transformed one.
Raises:
MetadataTransformationError:
if the transformation of the metadata fails.
"""
for reference in references:
metadata = add_reference_to_metadata(
metadata=metadata,
reference=reference,
anchor_points_by_target=anchor_points_by_target,
)

return metadata
Loading

0 comments on commit 84dda98

Please sign in to comment.