Skip to content

Commit

Permalink
code migration to schemapack 0.4 (#84)
Browse files Browse the repository at this point in the history
* migration to schmepack 0.4

* new assumption for required properties

* template update

---------

Co-authored-by: Thomas Zajac <thomas-jakob.zajac@uni-tuebingen.de>
  • Loading branch information
sbilge and mephenor authored Dec 17, 2024
1 parent ab90937 commit bbdcb07
Show file tree
Hide file tree
Showing 28 changed files with 262 additions and 110 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ repos:
- id: no-commit-to-branch
args: [--branch, dev, --branch, int, --branch, main]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.2
rev: v0.8.0
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
2 changes: 1 addition & 1 deletion .pyproject_generation/pyproject_custom.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name = "metldata"
version = "1.0.0"
description = "metldata - A framework for handling metadata based on ETL, CQRS, and event sourcing."
dependencies = [
"schemapack == 2.0.0-alpha.3"
"schemapack == 2.0.0-alpha.5"
]

[project.urls]
Expand Down
2 changes: 2 additions & 0 deletions .pyproject_generation/pyproject_template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ fixable = [
"UP", # e.g. List -> list
"I", # sort imports
"D", # pydocstyle
"RUF022", # sort items in __all__
]
ignore = [
"E111", # indentation with invalid multiple (for formatter)
Expand All @@ -59,6 +60,7 @@ ignore = [
"D206", # indent-with-spaces (for formatter)
"D300", # triple-single-quotes (for formatter)
"UP040", # type statement (not yet supported by mypy)
"PLC0206", # Extracting value from dictionary without calling `.items()`
]
select = [
"C90", # McCabe Complexity
Expand Down
59 changes: 33 additions & 26 deletions lock/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ anyio==4.3.0 \
--hash=sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8 \
--hash=sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6
# via httpx
arcticfreeze==0.1.1 \
--hash=sha256:977a1d7c74498d00201d65519fa59f2b59e5a2b303668f7ecaec58a270ce5eda \
--hash=sha256:c9811b3c787bfdb0e2ea721b0a99bb0e700c91945fb1c9ce0822cb859b409916
# via schemapack
attrs==23.2.0 \
--hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \
--hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1
Expand Down Expand Up @@ -222,10 +226,10 @@ idna==3.7 \
# anyio
# httpx
# requests
immutabledict==3.0.0 \
--hash=sha256:034bacc6c6872707c4ec0ea9515de6bbe0dcf0fcabd97ae19fd4e4c338f05798 \
--hash=sha256:5a23cd369a6187f76a8c29d7d687980b092538eb9800e58964603f1b973c56fe
# via schemapack
immutabledict==4.2.0 \
--hash=sha256:d728b2c2410d698d95e6200237feb50a695584d20289ad3379a439aa3d90baba \
--hash=sha256:e003fd81aad2377a5a758bf7e1086cf3b70b63e9a5cc2f46bce8d0a2b4727c5f
# via arcticfreeze
iniconfig==2.0.0 \
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
Expand Down Expand Up @@ -507,7 +511,9 @@ requests==2.31.0 \
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via typer
# via
# schemapack
# typer
rpds-py==0.18.0 \
--hash=sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f \
--hash=sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c \
Expand Down Expand Up @@ -667,28 +673,29 @@ ruamel-yaml-clib==0.2.8 \
--hash=sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875 \
--hash=sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412
# via ruamel-yaml
ruff==0.4.2 \
--hash=sha256:0e2e06459042ac841ed510196c350ba35a9b24a643e23db60d79b2db92af0c2b \
--hash=sha256:1f32cadf44c2020e75e0c56c3408ed1d32c024766bd41aedef92aa3ca28eef68 \
--hash=sha256:22e306bf15e09af45ca812bc42fa59b628646fa7c26072555f278994890bc7ac \
--hash=sha256:24016ed18db3dc9786af103ff49c03bdf408ea253f3cb9e3638f39ac9cf2d483 \
--hash=sha256:33bcc160aee2520664bc0859cfeaebc84bb7323becff3f303b8f1f2d81cb4edc \
--hash=sha256:3afabaf7ba8e9c485a14ad8f4122feff6b2b93cc53cd4dad2fd24ae35112d5c5 \
--hash=sha256:5ec481661fb2fd88a5d6cf1f83403d388ec90f9daaa36e40e2c003de66751798 \
--hash=sha256:652e4ba553e421a6dc2a6d4868bc3b3881311702633eb3672f9f244ded8908cd \
--hash=sha256:6a2243f8f434e487c2a010c7252150b1fdf019035130f41b77626f5655c9ca22 \
--hash=sha256:6ab165ef5d72392b4ebb85a8b0fbd321f69832a632e07a74794c0e598e7a8376 \
--hash=sha256:7891ee376770ac094da3ad40c116258a381b86c7352552788377c6eb16d784fe \
--hash=sha256:799eb468ea6bc54b95527143a4ceaf970d5aa3613050c6cff54c85fda3fde480 \
--hash=sha256:82986bb77ad83a1719c90b9528a9dd663c9206f7c0ab69282af8223566a0c34e \
--hash=sha256:8772130a063f3eebdf7095da00c0b9898bd1774c43b336272c3e98667d4fb8fa \
--hash=sha256:8d14dc8953f8af7e003a485ef560bbefa5f8cc1ad994eebb5b12136049bbccc5 \
--hash=sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf \
--hash=sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069
ruff==0.8.0 \
--hash=sha256:0e55cce9aa93c5d0d4e3937e47b169035c7e91c8655b0974e61bb79cf398d49c \
--hash=sha256:295bb4c02d58ff2ef4378a1870c20af30723013f441c9d1637a008baaf928c8b \
--hash=sha256:2dabfd05b96b7b8f2da00d53c514eea842bff83e41e1cceb08ae1966254a51df \
--hash=sha256:3f4cd64916d8e732ce6b87f3f5296a8942d285bbbc161acee7fe561134af64f9 \
--hash=sha256:582891c57b96228d146725975fbb942e1f30a0c4ba19722e692ca3eb25cc9b4f \
--hash=sha256:5fdb6efecc3eb60bba5819679466471fd7d13c53487df7248d6e27146e985468 \
--hash=sha256:780d5d8523c04202184405e60c98d7595bdb498c3c6abba3b6d4cdf2ca2af426 \
--hash=sha256:7b1f1c76b47c18fa92ee78b60d2d20d7e866c55ee603e7d19c1e991fad933a9a \
--hash=sha256:812e2052121634cf13cd6fddf0c1871d0ead1aad40a1a258753c04c18bb71bbd \
--hash=sha256:83a55679c4cb449fa527b8497cadf54f076603cc36779b2170b24f704171ce70 \
--hash=sha256:85e654f0ded7befe2d61eeaf3d3b1e4ef3894469cd664ffa85006c7720f1e4a2 \
--hash=sha256:87a8e86bae0dbd749c815211ca11e3a7bd559b9710746c559ed63106d382bd9c \
--hash=sha256:a7ccfe6331bf8c8dad715753e157457faf7351c2b69f62f32c165c2dbcbacd44 \
--hash=sha256:ba93e6294e9a737cd726b74b09a6972e36bb511f9a102f1d9a7e1ce94dd206a6 \
--hash=sha256:c5c1466be2a2ebdf7c5450dd5d980cc87c8ba6976fb82582fea18823da6fa362 \
--hash=sha256:eb0d4f250a7711b67ad513fde67e8870109e5ce590a801c3722580fe98c33a99 \
--hash=sha256:facebdfe5a5af6b1588a1d26d170635ead6892d0e314477e80256ef4a8470cf3 \
--hash=sha256:fcb1bf2cc6706adae9d79c8d86478677e3bbd4ced796ccad106fd4776d395fea
# via -r lock/requirements-dev-template.in
schemapack==2.0.0a3 \
--hash=sha256:897c2659beb358a4256b95db94b9dfa1095b46698f46e4e3f2c50989e6c0210c \
--hash=sha256:fc6c182cf0f27e362a3a6163bf1c070f79c758362e53420980fbce7f1927c1bf
schemapack==2.0.0a5 \
--hash=sha256:00418c00dda7fe64fbb108289d2733e9230357348c9afb2c0104b0e1bef7a611 \
--hash=sha256:66fd591497b867b44870a2c3d7c52947e859e31ccaeeb3c59c0f6acfd3929316
# via metldata (pyproject.toml)
setuptools==69.5.1 \
--hash=sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987 \
Expand Down
44 changes: 37 additions & 7 deletions lock/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ annotated-types==0.6.0 \
# via
# -c lock/requirements-dev.txt
# pydantic
arcticfreeze==0.1.1 \
--hash=sha256:977a1d7c74498d00201d65519fa59f2b59e5a2b303668f7ecaec58a270ce5eda \
--hash=sha256:c9811b3c787bfdb0e2ea721b0a99bb0e700c91945fb1c9ce0822cb859b409916
# via
# -c lock/requirements-dev.txt
# schemapack
attrs==23.2.0 \
--hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \
--hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1
# via
# -c lock/requirements-dev.txt
# jsonschema
# referencing
immutabledict==3.0.0 \
--hash=sha256:034bacc6c6872707c4ec0ea9515de6bbe0dcf0fcabd97ae19fd4e4c338f05798 \
--hash=sha256:5a23cd369a6187f76a8c29d7d687980b092538eb9800e58964603f1b973c56fe
immutabledict==4.2.0 \
--hash=sha256:d728b2c2410d698d95e6200237feb50a695584d20289ad3379a439aa3d90baba \
--hash=sha256:e003fd81aad2377a5a758bf7e1086cf3b70b63e9a5cc2f46bce8d0a2b4727c5f
# via
# -c lock/requirements-dev.txt
# schemapack
# arcticfreeze
jsonschema==4.21.1 \
--hash=sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f \
--hash=sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5
Expand All @@ -29,6 +35,18 @@ jsonschema-specifications==2023.12.1 \
# via
# -c lock/requirements-dev.txt
# jsonschema
markdown-it-py==3.0.0 \
--hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \
--hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb
# via
# -c lock/requirements-dev.txt
# rich
mdurl==0.1.2 \
--hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \
--hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba
# via
# -c lock/requirements-dev.txt
# markdown-it-py
pydantic==2.7.1 \
--hash=sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5 \
--hash=sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc
Expand Down Expand Up @@ -125,6 +143,12 @@ pydantic-settings==2.2.1 \
# via
# -c lock/requirements-dev.txt
# schemapack
pygments==2.17.2 \
--hash=sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c \
--hash=sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367
# via
# -c lock/requirements-dev.txt
# rich
python-dotenv==1.0.1 \
--hash=sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca \
--hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a
Expand All @@ -138,6 +162,12 @@ referencing==0.35.0 \
# -c lock/requirements-dev.txt
# jsonschema
# jsonschema-specifications
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via
# -c lock/requirements-dev.txt
# schemapack
rpds-py==0.18.0 \
--hash=sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f \
--hash=sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c \
Expand Down Expand Up @@ -302,9 +332,9 @@ ruamel-yaml-clib==0.2.8 \
# via
# -c lock/requirements-dev.txt
# ruamel-yaml
schemapack==2.0.0a3 \
--hash=sha256:897c2659beb358a4256b95db94b9dfa1095b46698f46e4e3f2c50989e6c0210c \
--hash=sha256:fc6c182cf0f27e362a3a6163bf1c070f79c758362e53420980fbce7f1927c1bf
schemapack==2.0.0a5 \
--hash=sha256:00418c00dda7fe64fbb108289d2733e9230357348c9afb2c0104b0e1bef7a611 \
--hash=sha256:66fd591497b867b44870a2c3d7c52947e859e31ccaeeb3c59c0f6acfd3929316
# via
# -c lock/requirements-dev.txt
# metldata (pyproject.toml)
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ name = "metldata"
version = "1.0.0"
description = "metldata - A framework for handling metadata based on ETL, CQRS, and event sourcing."
dependencies = [
"schemapack == 2.0.0-alpha.3",
"schemapack == 2.0.0-alpha.5",
]

[project.license]
Expand Down Expand Up @@ -60,6 +60,7 @@ fixable = [
"UP",
"I",
"D",
"RUF022",
]
ignore = [
"E111",
Expand All @@ -79,6 +80,7 @@ ignore = [
"D206",
"D300",
"UP040",
"PLC0206",
]
select = [
"C90",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def check_model_assumptions(
# Check if the property already exists in the target schema
try:
target_schema = resolve_schema_object_path(
json_schema=class_def.content.json_schema_dict,
json_schema=class_def.content,
path=instruction.target_content.object_path,
)
except KeyError:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from metldata.builtin_transformations.add_content_properties.path import (
resolve_data_object_path,
)
from metldata.builtin_transformations.common.utils import data_to_dict
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -44,19 +45,17 @@ def add_properties(
Returns:
The data with the specified content properties being added.
"""
modified_data = data.model_copy(deep=True)

modified_data = data_to_dict(data)
for class_name, instructions in instructions_by_class.items():
resources = modified_data.resources.get(class_name)
class_resources = modified_data["resources"].get(class_name)

if not resources:
if not class_resources:
raise EvitableTransformationError()

for resource in resources.values():
for resource in class_resources.values():
for instruction in instructions:
content = resource.content
object = resolve_data_object_path(
data=content,
data=resource["content"],
path=instruction.target_content.object_path,
)

Expand All @@ -69,5 +68,4 @@ def add_properties(
object[instruction.target_content.property_name] = deepcopy(
instruction.value
)

return modified_data
return DataPack.model_validate(modified_data)
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
resolve_schema_object_path,
)
from metldata.builtin_transformations.common.model_transform import update_model
from metldata.builtin_transformations.common.utils import content_to_dict
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -40,12 +41,10 @@ def add_content_properties(
updated_class_defs: dict[str, ClassDefinition] = {}
for class_name, cls_instructions in instructions_by_class.items():
class_def = model.classes.get(class_name)

if not class_def:
raise EvitableTransformationError()

content_schema = class_def.content.json_schema_dict

content_schema = content_to_dict(class_def)
for cls_instruction in cls_instructions:
object_path = cls_instruction.target_content.object_path
property_name = cls_instruction.target_content.property_name
Expand Down
31 changes: 27 additions & 4 deletions src/metldata/builtin_transformations/common/assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

"Assumptions utilized in more than one transformation"

from collections.abc import Mapping
from typing import Any

from schemapack.spec.schemapack import SchemaPack

from metldata.builtin_transformations.add_content_properties.path import (
Expand Down Expand Up @@ -107,14 +110,30 @@ class in the provided `path_element`, depending on the type of the relation path
)


def assert_object_path_required(json_schema: Mapping[str, Any], path: str) -> None:
"""Ensures that a given object path in a JSON schema is marked as required.
This validates that any transformation relying on that path can depend on its
presence in a datapack.
If the path is an empty string, no validation is required.
"""
if not path:
return
for key in path.split("."):
required_keys = json_schema.get("required", [])
if key not in required_keys:
raise ModelAssumptionError(f"'{key}' is not marked as required.")
json_schema = json_schema["properties"][key]


def assert_object_path_exists(
*,
model: SchemaPack,
instruction: InstructionProtocol,
) -> None:
"""Make sure that the source class (the class being modified) and the object_path
exist in the model. Assumption fails if the content path is present in the model before
the transformation.
exist in the model, and object_path properties are marked as required.
Assumption fails if the content path is present in the model before the transformation.
"""
class_name = instruction.class_name
class_def = model.classes.get(class_name)
Expand All @@ -126,15 +145,19 @@ def assert_object_path_exists(
# Check if the object_path already exists in the model
try:
target_schema = resolve_schema_object_path(
json_schema=class_def.content.json_schema_dict,
json_schema=class_def.content,
path=instruction.target_content.object_path,
)
except KeyError as exc:
raise ModelAssumptionError(
f"Object path {instruction.target_content.object_path} does not exist"
+ f" in class {class_name}."
) from exc

else:
assert_object_path_required(
json_schema=class_def.content,
path=instruction.target_content.object_path,
)
# Check if the property_name already exists in the model and raise an error if so
if instruction.target_content.property_name in target_schema.get("properties", {}):
raise ModelAssumptionError(
Expand Down
25 changes: 25 additions & 0 deletions src/metldata/builtin_transformations/common/custom_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""A collection of custom types used for builtin transformations."""

from typing import Any, TypeAlias

ResourceId: TypeAlias = str
MutableDatapack: TypeAlias = dict[str, Any]
MutableResource: TypeAlias = dict[str, dict]
MutableClassResources: TypeAlias = dict[ResourceId, MutableResource]
MutableResourceContent: TypeAlias = dict[str, Any]
Loading

0 comments on commit bbdcb07

Please sign in to comment.