From 01db23a1790531e496e4d1918ae20a99d540a50d Mon Sep 17 00:00:00 2001 From: sbilge Date: Tue, 16 Jul 2024 07:52:09 +0000 Subject: [PATCH] template files for count references transformation --- .../path/__init__.py | 0 .../{infer_relations => common}/path/path.py | 2 +- .../path/path_elements.py | 0 .../path/path_str.py | 2 +- .../count_references/__init__.py | 20 ++++ .../count_references/assumptions.py | 63 ++++++++++++ .../count_references/config.py | 32 ++++++ .../count_references/data_transform.py | 37 +++++++ .../count_references/main.py | 77 +++++++++++++++ .../count_references/model_transform.py | 38 +++++++ .../infer_relations/assumptions.py | 4 +- .../infer_relations/relations.py | 2 +- .../count_references/multiple/config.yaml | 31 ++++++ .../multiple/transformed.datapack.yaml | 58 +++++++++++ .../multiple/transformed.schemapack.yaml | 98 +++++++++++++++++++ tests/fixtures/transformations.py | 4 + 16 files changed, 463 insertions(+), 5 deletions(-) rename src/metldata/builtin_transformations/{infer_relations => common}/path/__init__.py (100%) rename src/metldata/builtin_transformations/{infer_relations => common}/path/path.py (98%) rename src/metldata/builtin_transformations/{infer_relations => common}/path/path_elements.py (100%) rename src/metldata/builtin_transformations/{infer_relations => common}/path/path_str.py (98%) create mode 100644 src/metldata/builtin_transformations/count_references/__init__.py create mode 100644 src/metldata/builtin_transformations/count_references/assumptions.py create mode 100644 src/metldata/builtin_transformations/count_references/config.py create mode 100644 src/metldata/builtin_transformations/count_references/data_transform.py create mode 100644 src/metldata/builtin_transformations/count_references/main.py create mode 100644 src/metldata/builtin_transformations/count_references/model_transform.py create mode 100644 tests/fixtures/example_transformations/count_references/multiple/config.yaml create mode 100644 tests/fixtures/example_transformations/count_references/multiple/transformed.datapack.yaml create mode 100644 tests/fixtures/example_transformations/count_references/multiple/transformed.schemapack.yaml diff --git a/src/metldata/builtin_transformations/infer_relations/path/__init__.py b/src/metldata/builtin_transformations/common/path/__init__.py similarity index 100% rename from src/metldata/builtin_transformations/infer_relations/path/__init__.py rename to src/metldata/builtin_transformations/common/path/__init__.py diff --git a/src/metldata/builtin_transformations/infer_relations/path/path.py b/src/metldata/builtin_transformations/common/path/path.py similarity index 98% rename from src/metldata/builtin_transformations/infer_relations/path/path.py rename to src/metldata/builtin_transformations/common/path/path.py index f6ac3cf..fefba31 100644 --- a/src/metldata/builtin_transformations/infer_relations/path/path.py +++ b/src/metldata/builtin_transformations/common/path/path.py @@ -18,7 +18,7 @@ from pydantic import GetJsonSchemaHandler, ValidationInfo -from metldata.builtin_transformations.infer_relations.path.path_str import ( +from metldata.builtin_transformations.common.path.path_str import ( PATH_PATTERN, ValidationError, clean_path_str, diff --git a/src/metldata/builtin_transformations/infer_relations/path/path_elements.py b/src/metldata/builtin_transformations/common/path/path_elements.py similarity index 100% rename from src/metldata/builtin_transformations/infer_relations/path/path_elements.py rename to src/metldata/builtin_transformations/common/path/path_elements.py diff --git a/src/metldata/builtin_transformations/infer_relations/path/path_str.py b/src/metldata/builtin_transformations/common/path/path_str.py similarity index 98% rename from src/metldata/builtin_transformations/infer_relations/path/path_str.py rename to src/metldata/builtin_transformations/common/path/path_str.py index 6d3e548..8cf26ec 100644 --- a/src/metldata/builtin_transformations/infer_relations/path/path_str.py +++ b/src/metldata/builtin_transformations/common/path/path_str.py @@ -18,7 +18,7 @@ import re -from metldata.builtin_transformations.infer_relations.path.path_elements import ( +from metldata.builtin_transformations.common.path.path_elements import ( RelationPathElement, RelationPathElementType, ) diff --git a/src/metldata/builtin_transformations/count_references/__init__.py b/src/metldata/builtin_transformations/count_references/__init__.py new file mode 100644 index 0000000..ab70fc1 --- /dev/null +++ b/src/metldata/builtin_transformations/count_references/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A transformation to count the references.""" + +# shortcuts: +from metldata.builtin_transformations.count_references.main import ( # noqa: F401 + COUNT_REFERENCES_TRANSFORMATION, +) diff --git a/src/metldata/builtin_transformations/count_references/assumptions.py b/src/metldata/builtin_transformations/count_references/assumptions.py new file mode 100644 index 0000000..e5eb974 --- /dev/null +++ b/src/metldata/builtin_transformations/count_references/assumptions.py @@ -0,0 +1,63 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Assumptions for count references transformation" + +from typing import Any + +from schemapack.spec.schemapack import SchemaPack + +from metldata.builtin_transformations.common.path.path import RelationPath +from metldata.builtin_transformations.common.path.path_elements import ( + RelationPathElementType, +) +from metldata.transform.base import ModelAssumptionError + + +def assert_path_classes_and_relations_exist(model: SchemaPack, path: RelationPath): + """Make sure that all classes and relations defined in the provided path exist in + the provided model. + + Raises: + ModelAssumptionError: + if the model does not fulfill the assumptions. + """ + for path_element in path.elements: + if path_element.source not in model.classes: + raise ModelAssumptionError( + f"Class {path_element.source} not found in model." + ) + + if path_element.target not in model.classes: + raise ModelAssumptionError( + f"Class {path_element.target} not found in model." + ) + + if path_element.type_ == RelationPathElementType.ACTIVE: + if ( + path_element.property + not in model.classes[path_element.source].relations + ): + raise ModelAssumptionError( + f"Relation property {path_element.property} not found in class" + f" {path_element.source}." + ) + + return + + +def check_model_assumptions(schema: SchemaPack, instructions_by_class: Any) -> None: + """Check the model assumptions for the count references transformation.""" + return None diff --git a/src/metldata/builtin_transformations/count_references/config.py b/src/metldata/builtin_transformations/count_references/config.py new file mode 100644 index 0000000..4aec28e --- /dev/null +++ b/src/metldata/builtin_transformations/count_references/config.py @@ -0,0 +1,32 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Models used to describe count content properties that shall be calculated and added.""" + +from typing import Any + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class CountReferencesConfig(BaseSettings): + """Config containing content properties to be deleted from models and data.""" + + model_config = SettingsConfigDict(extra="forbid") + + count_references: Any = Field( + ..., + description=("description"), + examples=[], + ) diff --git a/src/metldata/builtin_transformations/count_references/data_transform.py b/src/metldata/builtin_transformations/count_references/data_transform.py new file mode 100644 index 0000000..106dfe4 --- /dev/null +++ b/src/metldata/builtin_transformations/count_references/data_transform.py @@ -0,0 +1,37 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Logic for transforming data.""" + +from schemapack.spec.datapack import DataPack + +# from metldata.transform.base import EvitableTransformationError + + +def count_references(*, data: DataPack) -> DataPack: + """Count + + Args: + data: + + + Returns: + The data with + """ + modified_data = data.model_copy(deep=True) + + # TODO modifications + + return modified_data diff --git a/src/metldata/builtin_transformations/count_references/main.py b/src/metldata/builtin_transformations/count_references/main.py new file mode 100644 index 0000000..4445a40 --- /dev/null +++ b/src/metldata/builtin_transformations/count_references/main.py @@ -0,0 +1,77 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A transformation to count references.""" + +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack + +from metldata.builtin_transformations.count_references.assumptions import ( + check_model_assumptions, +) +from metldata.builtin_transformations.count_references.config import ( + CountReferencesConfig, +) +from metldata.builtin_transformations.count_references.data_transform import ( + count_references, +) +from metldata.builtin_transformations.count_references.model_transform import ( + add_count_references, +) +from metldata.transform.base import DataTransformer, TransformationDefinition + + +class CountReferencesTransformer(DataTransformer[CountReferencesConfig]): + """A transformer that counts the references and adds them to content properties.""" + + def transform(self, data: DataPack) -> DataPack: + """Transforms data. + + Args: + data: The data as DataPack to be transformed. + """ + return count_references(data=data) + + +def check_model_assumptions_wrapper( + model: SchemaPack, config: CountReferencesConfig +) -> None: + """Check the assumptions of the model. + + Raises: + ModelAssumptionError: + if the model does not fulfill the assumptions. + """ + check_model_assumptions(schema=model, instructions_by_class=config.count_references) + + +def transform_model(model: SchemaPack, config: CountReferencesConfig) -> SchemaPack: + """Transform the data model. + + Raises: + DataModelTransformationError: + if the transformation fails. + """ + return add_count_references( + model=model, instructions_by_class=config.count_references + ) + + +COUNT_REFERENCES_TRANSFORMATION = TransformationDefinition[CountReferencesConfig]( + config_cls=CountReferencesConfig, + check_model_assumptions=check_model_assumptions_wrapper, + transform_model=transform_model, + data_transformer_factory=CountReferencesTransformer, +) diff --git a/src/metldata/builtin_transformations/count_references/model_transform.py b/src/metldata/builtin_transformations/count_references/model_transform.py new file mode 100644 index 0000000..e0db6be --- /dev/null +++ b/src/metldata/builtin_transformations/count_references/model_transform.py @@ -0,0 +1,38 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Model transformation logic for the 'count references' transformation""" + +from schemapack.spec.schemapack import ( + # ClassDefinition, + SchemaPack, +) + +# from metldata.transform.base import EvitableTransformationError + + +def add_count_references( + *, model: SchemaPack, instructions_by_class: dict[str, list[str]] +) -> SchemaPack: + """Delete content properties from a model. + + Args: + model: + The model based on SchemaPack to + Returns: + The model with the + """ + # TODO model transform logic for count references + + return model diff --git a/src/metldata/builtin_transformations/infer_relations/assumptions.py b/src/metldata/builtin_transformations/infer_relations/assumptions.py index fa4836e..a933cbd 100644 --- a/src/metldata/builtin_transformations/infer_relations/assumptions.py +++ b/src/metldata/builtin_transformations/infer_relations/assumptions.py @@ -18,10 +18,10 @@ from schemapack.spec.schemapack import SchemaPack -from metldata.builtin_transformations.infer_relations.path.path import ( +from metldata.builtin_transformations.common.path.path import ( RelationPath, ) -from metldata.builtin_transformations.infer_relations.path.path_elements import ( +from metldata.builtin_transformations.common.path.path_elements import ( RelationPathElementType, ) from metldata.builtin_transformations.infer_relations.relations import ( diff --git a/src/metldata/builtin_transformations/infer_relations/relations.py b/src/metldata/builtin_transformations/infer_relations/relations.py index 962089b..abff442 100644 --- a/src/metldata/builtin_transformations/infer_relations/relations.py +++ b/src/metldata/builtin_transformations/infer_relations/relations.py @@ -18,7 +18,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator -from metldata.builtin_transformations.infer_relations.path.path import ( +from metldata.builtin_transformations.common.path.path import ( RelationPath, ) diff --git a/tests/fixtures/example_transformations/count_references/multiple/config.yaml b/tests/fixtures/example_transformations/count_references/multiple/config.yaml new file mode 100644 index 0000000..bfdf28f --- /dev/null +++ b/tests/fixtures/example_transformations/count_references/multiple/config.yaml @@ -0,0 +1,31 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +count_references: +- class_name: Dataset + target_content: + object_path: "file_summary" + property_name: "count" + source_relation_path: "Dataset(files)>File" +- class_name: Sample + target_content: + object_path: "file_summary" + property_name: "count" + source_relation_path: "Sample(files)>File" +- class_name: Experiment + target_content: + object_path: "sample_summary" + property_name: "count" + source_relation_path: "Experiment(samples)>Sample" diff --git a/tests/fixtures/example_transformations/count_references/multiple/transformed.datapack.yaml b/tests/fixtures/example_transformations/count_references/multiple/transformed.datapack.yaml new file mode 100644 index 0000000..09aead8 --- /dev/null +++ b/tests/fixtures/example_transformations/count_references/multiple/transformed.datapack.yaml @@ -0,0 +1,58 @@ +datapack: 0.3.0 +resources: + File: + file_a: + content: + filename: file_a.fastq + format: FASTQ + checksum: 1a5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92b8 + size: 12321 + file_b: + content: + filename: file_b.fastq + format: FASTQ + checksum: 2b5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92c9 + size: 12314 + file_c: + content: + filename: file_c.fastq + format: FASTQ + checksum: a9c24870071da03f78515e6197048f3a2172e90e597e9250cd01a0cb8f0986ed + size: 12123 + Dataset: + dataset_1: + content: + dac_contact: dac@example.org + file_summary: # <- + count: 3 + relations: + files: + - file_a + - file_b + - file_c + Sample: + sample_x: + content: + description: Some sample. + file_summary: # <- + count: 2 + relations: + files: + - file_a + - file_b + sample_y: + content: + file_summary: # <- + count: 1 + relations: + files: + - file_c + Experiment: + experiment_i: + content: + sample_summary: # <- + count: 2 + relations: + samples: + - sample_x + - sample_y diff --git a/tests/fixtures/example_transformations/count_references/multiple/transformed.schemapack.yaml b/tests/fixtures/example_transformations/count_references/multiple/transformed.schemapack.yaml new file mode 100644 index 0000000..9aa8c86 --- /dev/null +++ b/tests/fixtures/example_transformations/count_references/multiple/transformed.schemapack.yaml @@ -0,0 +1,98 @@ +# a simple schemapack: +schemapack: 0.3.0 +classes: + File: + id: + propertyName: alias + content: ../example_content_schemas/File.schema.json + Dataset: + id: + propertyName: alias + content: + { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "description": "A dataset that is a collection of files.", + "properties": + { + "dac_contact": { "type": "string" }, + "file_summary": + { + "type": "object", + "additionalProperties": false, + "properties": { "count": { "type": "integer" } }, + "required": ["count"], + }, + }, + "type": "object", + } + relations: + files: + targetClass: File + multiple: + origin: true + target: true + mandatory: + origin: false + target: true + Sample: + id: + propertyName: alias + content: + { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "description": "A sample used to generate files in the context of an experiment.", + "properties": + { + "description": { "type": "string" }, + "file_summary": + { + "type": "object", + "additionalProperties": false, + "properties": { "count": { "type": "integer" } }, + "required": ["count"], + }, + "type": "object", + }, + } + relations: + files: + targetClass: File + multiple: + origin: false + target: true + mandatory: + origin: false + target: true + Experiment: + id: + propertyName: alias + content: + { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "description": "An experiment containing one or multiple samples.", + "properties": + { + "description": { "type": "string" }, + "sample_summary": + { + "type": "object", + "additionalProperties": false, + "properties": { "count": { "type": "integer" } }, + "required": ["count"], + }, + "type": "object", + }, + "type": "object", + } + relations: + samples: + targetClass: Sample + multiple: + origin: false + target: true + mandatory: + origin: true + target: true diff --git a/tests/fixtures/transformations.py b/tests/fixtures/transformations.py index c5a0eb8..51c7dc2 100644 --- a/tests/fixtures/transformations.py +++ b/tests/fixtures/transformations.py @@ -26,6 +26,9 @@ from metldata.builtin_transformations.add_content_properties import ( ADD_CONTENT_PROPERTIES_TRANSFORMATION, ) +from metldata.builtin_transformations.count_references import ( + COUNT_REFERENCES_TRANSFORMATION, +) from metldata.builtin_transformations.delete_properties import ( PROPERTY_DELETION_TRANSFORMATION, ) @@ -43,6 +46,7 @@ "infer_relations": RELATION_INFERENCE_TRANSFORMATION, "delete_properties": PROPERTY_DELETION_TRANSFORMATION, "add_content_properties": ADD_CONTENT_PROPERTIES_TRANSFORMATION, + "count_references": COUNT_REFERENCES_TRANSFORMATION, }