Skip to content

Commit

Permalink
infer_relations, delete_properties and add_content
Browse files Browse the repository at this point in the history
_properties migrated
  • Loading branch information
sbilge committed Nov 15, 2024
1 parent ab90937 commit f8bf192
Show file tree
Hide file tree
Showing 17 changed files with 170 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .pyproject_generation/pyproject_custom.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name = "metldata"
version = "1.0.0"
description = "metldata - A framework for handling metadata based on ETL, CQRS, and event sourcing."
dependencies = [
"schemapack == 2.0.0-alpha.3"
"schemapack == 2.0.0-alpha.4"
]

[project.urls]
Expand Down
22 changes: 14 additions & 8 deletions lock/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ anyio==4.3.0 \
--hash=sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8 \
--hash=sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6
# via httpx
arcticfreeze==0.1.1 \
--hash=sha256:977a1d7c74498d00201d65519fa59f2b59e5a2b303668f7ecaec58a270ce5eda \
--hash=sha256:c9811b3c787bfdb0e2ea721b0a99bb0e700c91945fb1c9ce0822cb859b409916
# via schemapack
attrs==23.2.0 \
--hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \
--hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1
Expand Down Expand Up @@ -222,10 +226,10 @@ idna==3.7 \
# anyio
# httpx
# requests
immutabledict==3.0.0 \
--hash=sha256:034bacc6c6872707c4ec0ea9515de6bbe0dcf0fcabd97ae19fd4e4c338f05798 \
--hash=sha256:5a23cd369a6187f76a8c29d7d687980b092538eb9800e58964603f1b973c56fe
# via schemapack
immutabledict==4.2.0 \
--hash=sha256:d728b2c2410d698d95e6200237feb50a695584d20289ad3379a439aa3d90baba \
--hash=sha256:e003fd81aad2377a5a758bf7e1086cf3b70b63e9a5cc2f46bce8d0a2b4727c5f
# via arcticfreeze
iniconfig==2.0.0 \
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
Expand Down Expand Up @@ -507,7 +511,9 @@ requests==2.31.0 \
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via typer
# via
# schemapack
# typer
rpds-py==0.18.0 \
--hash=sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f \
--hash=sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c \
Expand Down Expand Up @@ -686,9 +692,9 @@ ruff==0.4.2 \
--hash=sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf \
--hash=sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069
# via -r lock/requirements-dev-template.in
schemapack==2.0.0a3 \
--hash=sha256:897c2659beb358a4256b95db94b9dfa1095b46698f46e4e3f2c50989e6c0210c \
--hash=sha256:fc6c182cf0f27e362a3a6163bf1c070f79c758362e53420980fbce7f1927c1bf
schemapack==2.0.0a4 \
--hash=sha256:5e5a703f16cdb01067ac51a34362658db83e2adc48e61f4517c7d755154fbf68 \
--hash=sha256:a550df6bf0a4c5dc8293479a190600197a4a62b3fe3850c33b0ebe06ff11d729
# via metldata (pyproject.toml)
setuptools==69.5.1 \
--hash=sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987 \
Expand Down
44 changes: 37 additions & 7 deletions lock/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ annotated-types==0.6.0 \
# via
# -c lock/requirements-dev.txt
# pydantic
arcticfreeze==0.1.1 \
--hash=sha256:977a1d7c74498d00201d65519fa59f2b59e5a2b303668f7ecaec58a270ce5eda \
--hash=sha256:c9811b3c787bfdb0e2ea721b0a99bb0e700c91945fb1c9ce0822cb859b409916
# via
# -c lock/requirements-dev.txt
# schemapack
attrs==23.2.0 \
--hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \
--hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1
# via
# -c lock/requirements-dev.txt
# jsonschema
# referencing
immutabledict==3.0.0 \
--hash=sha256:034bacc6c6872707c4ec0ea9515de6bbe0dcf0fcabd97ae19fd4e4c338f05798 \
--hash=sha256:5a23cd369a6187f76a8c29d7d687980b092538eb9800e58964603f1b973c56fe
immutabledict==4.2.0 \
--hash=sha256:d728b2c2410d698d95e6200237feb50a695584d20289ad3379a439aa3d90baba \
--hash=sha256:e003fd81aad2377a5a758bf7e1086cf3b70b63e9a5cc2f46bce8d0a2b4727c5f
# via
# -c lock/requirements-dev.txt
# schemapack
# arcticfreeze
jsonschema==4.21.1 \
--hash=sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f \
--hash=sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5
Expand All @@ -29,6 +35,18 @@ jsonschema-specifications==2023.12.1 \
# via
# -c lock/requirements-dev.txt
# jsonschema
markdown-it-py==3.0.0 \
--hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \
--hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb
# via
# -c lock/requirements-dev.txt
# rich
mdurl==0.1.2 \
--hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \
--hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba
# via
# -c lock/requirements-dev.txt
# markdown-it-py
pydantic==2.7.1 \
--hash=sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5 \
--hash=sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc
Expand Down Expand Up @@ -125,6 +143,12 @@ pydantic-settings==2.2.1 \
# via
# -c lock/requirements-dev.txt
# schemapack
pygments==2.17.2 \
--hash=sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c \
--hash=sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367
# via
# -c lock/requirements-dev.txt
# rich
python-dotenv==1.0.1 \
--hash=sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca \
--hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a
Expand All @@ -138,6 +162,12 @@ referencing==0.35.0 \
# -c lock/requirements-dev.txt
# jsonschema
# jsonschema-specifications
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via
# -c lock/requirements-dev.txt
# schemapack
rpds-py==0.18.0 \
--hash=sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f \
--hash=sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c \
Expand Down Expand Up @@ -302,9 +332,9 @@ ruamel-yaml-clib==0.2.8 \
# via
# -c lock/requirements-dev.txt
# ruamel-yaml
schemapack==2.0.0a3 \
--hash=sha256:897c2659beb358a4256b95db94b9dfa1095b46698f46e4e3f2c50989e6c0210c \
--hash=sha256:fc6c182cf0f27e362a3a6163bf1c070f79c758362e53420980fbce7f1927c1bf
schemapack==2.0.0a4 \
--hash=sha256:5e5a703f16cdb01067ac51a34362658db83e2adc48e61f4517c7d755154fbf68 \
--hash=sha256:a550df6bf0a4c5dc8293479a190600197a4a62b3fe3850c33b0ebe06ff11d729
# via
# -c lock/requirements-dev.txt
# metldata (pyproject.toml)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ name = "metldata"
version = "1.0.0"
description = "metldata - A framework for handling metadata based on ETL, CQRS, and event sourcing."
dependencies = [
"schemapack == 2.0.0-alpha.3",
"schemapack == 2.0.0-alpha.4",
]

[project.license]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def check_model_assumptions(
# Check if the property already exists in the target schema
try:
target_schema = resolve_schema_object_path(
json_schema=class_def.content.json_schema_dict,
json_schema=class_def.content,
path=instruction.target_content.object_path,
)
except KeyError:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from metldata.builtin_transformations.add_content_properties.path import (
resolve_data_object_path,
)
from metldata.builtin_transformations.common.utils import thaw_frozen_dict
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -44,19 +45,23 @@ def add_properties(
Returns:
The data with the specified content properties being added.
"""
modified_data = data.model_copy(deep=True)
updated_classes: dict = {}

for class_name, instructions in instructions_by_class.items():
resources = modified_data.resources.get(class_name)
class_resources = data.resources.get(class_name)

if not resources:
if not class_resources:
raise EvitableTransformationError()

for resource in resources.values():
# convert to a mutable dict to modify it
# note that, it does not apply mutability to inner layer Resource objects
mutable_class_resources = thaw_frozen_dict(class_resources)

for resource_id, resource in class_resources.items():
resource_content = thaw_frozen_dict(resource.content)
for instruction in instructions:
content = resource.content
object = resolve_data_object_path(
data=content,
data=resource_content,
path=instruction.target_content.object_path,
)

Expand All @@ -69,5 +74,12 @@ def add_properties(
object[instruction.target_content.property_name] = deepcopy(
instruction.value
)

# resource_content changed
mutable_class_resources[resource_id] = resource.model_copy(
update={"content": resource_content})
#class resources changed
updated_classes[class_name]= class_resources.update(mutable_class_resources)
# resources changed
updated_resources = data.resources.update(updated_classes)
modified_data = data.model_copy(update={"resources": updated_resources})
return modified_data
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
resolve_schema_object_path,
)
from metldata.builtin_transformations.common.model_transform import update_model
from metldata.builtin_transformations.common.utils import (
model_to_dict,
thaw_frozen_dict,
)
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -40,12 +44,10 @@ def add_content_properties(
updated_class_defs: dict[str, ClassDefinition] = {}
for class_name, cls_instructions in instructions_by_class.items():
class_def = model.classes.get(class_name)

if not class_def:
raise EvitableTransformationError()

content_schema = class_def.content.json_schema_dict

content_schema = thaw_frozen_dict(class_def.content)
for cls_instruction in cls_instructions:
object_path = cls_instruction.target_content.object_path
property_name = cls_instruction.target_content.property_name
Expand Down
2 changes: 1 addition & 1 deletion src/metldata/builtin_transformations/common/assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def assert_object_path_exists(
# Check if the object_path already exists in the model
try:
target_schema = resolve_schema_object_path(
json_schema=class_def.content.json_schema_dict,
json_schema=class_def.content,
path=instruction.target_content.object_path,
)
except KeyError as exc:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@
resolve_schema_object_path,
)
from metldata.builtin_transformations.common.instruction import AggregateInstruction
from metldata.builtin_transformations.common.utils import model_to_dict
from metldata.transform.exceptions import EvitableTransformationError


def update_model(
*, model: SchemaPack, updated_class_defs: dict[str, ClassDefinition]
) -> SchemaPack:
"""Updates class definitions of a model that are subjected to model transformation"""
model_dict = model.model_dump()
model_dict = model_to_dict(model)
model_dict["classes"].update(updated_class_defs)
return SchemaPack.model_validate(model_dict)

Expand All @@ -51,7 +52,7 @@ def add_properties(
if not class_def:
raise EvitableTransformationError()

content_schema = class_def.content.json_schema_dict
content_schema = class_def.content

for cls_instruction in cls_instructions:
object_path = cls_instruction.target_content.object_path
Expand Down
46 changes: 46 additions & 0 deletions src/metldata/builtin_transformations/common/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import json
from collections.abc import Mapping
from copy import deepcopy
from typing import Any

from schemapack import dumps_schemapack
from schemapack.spec.schemapack import SchemaPack


def model_to_dict(
model: SchemaPack,
) -> dict[str, Any]:
"""Converts the provided SchemaPack model to a JSON-serializable dictionary.
Returns:
A dictionary representation of the provided model.
"""
return json.loads(dumps_schemapack(deepcopy(model), yaml_format=False))


def thaw_frozen_dict(frozen_dict: Mapping ) -> dict:
"""Recursively convert a nested FrozenDict, frozenset to mutable types.
This will be removed after we implement a FrozenDict validation to Schemapack lib.
"""
if isinstance(frozen_dict, Mapping):
return {key: thaw_frozen_dict(value) for key, value in frozen_dict.items()}
elif isinstance(frozen_dict, tuple):
return [thaw_frozen_dict(item) for item in frozen_dict]
return frozen_dict

Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def assert_source_content_path_exists(
f"Class {referenced_class} does not exist in the model."
)

content_slot = class_def.content.json_schema_dict["properties"].get(content_path)
content_slot = class_def.content["properties"].get(content_path)

if not content_slot:
raise ModelAssumptionError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def assert_classes_and_properties_exist(
)

for property in properties:
if property not in model.classes[class_name].content.json_schema_dict.get(
if property not in model.classes[class_name].content.get(
"properties", {}
):
raise ModelAssumptionError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from schemapack.spec.datapack import DataPack

from metldata.builtin_transformations.common.utils import thaw_frozen_dict
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -35,16 +36,27 @@ def delete_properties(
Returns:
The data with the specified content properties being deleted.
"""
modified_data = data.model_copy(deep=True)
updated_classes: dict = {}

for class_name, properties in properties_by_class.items():
resources = modified_data.resources.get(class_name)
class_resources = data.resources.get(class_name)

if not resources:
if not class_resources:
raise EvitableTransformationError()

for resource in resources.values():

# convert to a mutable dict to modify it
# note that, it does not apply mutability to inner layer Resource objects
mutable_class_resources = thaw_frozen_dict(class_resources)

for resource_id, resource in class_resources.items():
# convert to a mutable dict to modify it in place
resource_content = thaw_frozen_dict(resource.content)
for property in properties:
resource.content.pop(property, None)

resource_content.pop(property, None)
mutable_class_resources[resource_id] = resource.model_copy(
update={"content": resource_content}
)
updated_classes[class_name]= class_resources.update(mutable_class_resources)
updated_resources = data.resources.update(updated_classes)
modified_data = data.model_copy(update={"resources": updated_resources})
return modified_data
Loading

0 comments on commit f8bf192

Please sign in to comment.