From 9446da6577d34be0c8cc2c3b6d2ceddfc1d2447a Mon Sep 17 00:00:00 2001 From: KerstenBreuer Date: Thu, 7 Dec 2023 12:47:25 +0000 Subject: [PATCH] migrated transformation handling --- .../builtin_transformations/null/main.py | 6 +- src/metldata/schemapack_/transform/base.py | 9 +- .../schemapack_/transform/handling.py | 79 ++++---- src/metldata/schemapack_/transform/main.py | 95 --------- tests/schemapack_/fixtures/__init__.py | 16 ++ tests/schemapack_/fixtures/data.py | 34 ++++ .../invalid_minimal.datapack.yaml | 19 ++ .../example_data/valid_minimal.datapack.yaml | 40 ++++ .../example_models/minimal.schemapack.yaml | 49 +++++ tests/schemapack_/fixtures/models.py | 35 ++++ tests/schemapack_/fixtures/utils.py | 30 +++ tests/schemapack_/transform/test_handling.py | 187 +++++++++++++----- tests/schemapack_/transform/test_main.py | 94 --------- 13 files changed, 403 insertions(+), 290 deletions(-) delete mode 100644 src/metldata/schemapack_/transform/main.py create mode 100644 tests/schemapack_/fixtures/__init__.py create mode 100644 tests/schemapack_/fixtures/data.py create mode 100644 tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml create mode 100644 tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml create mode 100644 tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml create mode 100644 tests/schemapack_/fixtures/models.py create mode 100644 tests/schemapack_/fixtures/utils.py delete mode 100644 tests/schemapack_/transform/test_main.py diff --git a/src/metldata/schemapack_/builtin_transformations/null/main.py b/src/metldata/schemapack_/builtin_transformations/null/main.py index db824b2f..d932b5ef 100644 --- a/src/metldata/schemapack_/builtin_transformations/null/main.py +++ b/src/metldata/schemapack_/builtin_transformations/null/main.py @@ -19,7 +19,6 @@ from schemapack.spec.datapack import DataPack from schemapack.spec.schemapack import SchemaPack -from metldata.event_handling.models import SubmissionAnnotation from metldata.schemapack_.builtin_transformations.null.config import NullConfig from metldata.schemapack_.transform.base import ( DataTransformer, @@ -41,14 +40,11 @@ class NullTransformer(DataTransformer[NullConfig]): """A Null transformer that returns the input model and data unchanged. Useful e.g. for testing.""" - def transform( - self, *, data: DataPack, annotation: SubmissionAnnotation - ) -> DataPack: + def transform(self, data: DataPack) -> DataPack: """Transforms data. Args: data: The data as DataPack to be transformed. - annotation: The annotation on the data. Raises: DataTransformationError: diff --git a/src/metldata/schemapack_/transform/base.py b/src/metldata/schemapack_/transform/base.py index bfb4878a..742c7dd0 100644 --- a/src/metldata/schemapack_/transform/base.py +++ b/src/metldata/schemapack_/transform/base.py @@ -33,7 +33,9 @@ from schemapack.spec.datapack import DataPack from schemapack.spec.schemapack import SchemaPack -from metldata.event_handling.models import SubmissionAnnotation + +class ModelAssumptionError(RuntimeError): + """Raised when assumptions made by transformation step about a model are not met.""" class ModelTransformationError(RuntimeError): @@ -65,14 +67,11 @@ def __init__( self._transformed_model = transformed_model @abstractmethod - def transform( - self, *, data: DataPack, annotation: SubmissionAnnotation - ) -> DataPack: + def transform(self, data: DataPack) -> DataPack: """Transforms data. Args: data: The data as DataPack to be transformed. - annotation: The annotation on the data. Raises: DataTransformationError: diff --git a/src/metldata/schemapack_/transform/handling.py b/src/metldata/schemapack_/transform/handling.py index 8a542a72..2d1a5b7b 100644 --- a/src/metldata/schemapack_/transform/handling.py +++ b/src/metldata/schemapack_/transform/handling.py @@ -17,12 +17,11 @@ """Logic for handling Transformation.""" from pydantic import BaseModel, ConfigDict +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack +from schemapack.validation import SchemaPackValidator -from metldata.custom_types import Json -from metldata.event_handling.models import SubmissionAnnotation -from metldata.model_utils.essentials import MetadataModel -from metldata.model_utils.metadata_validator import MetadataValidator -from metldata.transform.base import ( +from metldata.schemapack_.transform.base import ( Config, TransformationDefinition, WorkflowConfig, @@ -38,7 +37,7 @@ class WorkflowConfigMismatchError(RuntimeError): """ def __init__( - self, workflow_definition: WorkflowDefinition, workflow_config: Config + self, workflow_definition: WorkflowDefinition, workflow_config: BaseModel ): """Initialize the error with the workflow definition and the config.""" message = ( @@ -56,7 +55,7 @@ def __init__( self, transformation_definition: TransformationDefinition[Config], transformation_config: Config, - original_model: MetadataModel, + original_model: SchemaPack, ): """Initialize the TransformationHandler by checking the assumptions made on the original model and transforming the model as described in the transformation @@ -75,41 +74,39 @@ def __init__( self.transformed_model = self._definition.transform_model( self._original_model, self._config ) - self._metadata_transformer = self._definition.metadata_transformer_factory( + self._data_transformer = self._definition.data_transformer_factory( config=self._config, original_model=self._original_model, transformed_model=self.transformed_model, ) - self._original_metadata_validator = MetadataValidator( - model=self._original_model + self._original_data_validator = SchemaPackValidator( + schemapack=self._original_model ) - self._transformed_metadata_validator = MetadataValidator( - model=self.transformed_model + self._transformed_data_validator = SchemaPackValidator( + schemapack=self.transformed_model ) - def transform_metadata( - self, metadata: Json, *, annotation: SubmissionAnnotation - ) -> Json: - """Transforms metadata using the transformation definition. Validates the - original metadata against the original model and the transformed metadata + def transform_data(self, data: DataPack) -> DataPack: + """Transforms data using the transformation definition. Validates the + original data against the original model and the transformed data against the transformed model. Args: - metadata: The metadata to be transformed. - annotation: The annotation on the metadata. + data: The data to be transformed. Raises: - MetadataTransformationError: + schemapack.exceptions.ValidationError: + If validation of input data or transformed data fails against the + original or transformed model, respectively. + DataTransformationError: if the transformation fails. """ - self._original_metadata_validator.validate(metadata) - transformed_metadata = self._metadata_transformer.transform( - metadata=metadata, annotation=annotation - ) - self._transformed_metadata_validator.validate(transformed_metadata) + self._original_data_validator.validate(datapack=data) + transformed_data = self._data_transformer.transform(data=data) + self._transformed_data_validator.validate(datapack=transformed_data) - return transformed_metadata + return transformed_data class ResolvedWorkflowStep(WorkflowStepBase): @@ -147,7 +144,7 @@ def resolve_workflow_step( step_name: str, workflow_definition: WorkflowDefinition, workflow_config: WorkflowConfig, - original_model: MetadataModel, + original_model: SchemaPack, ) -> ResolvedWorkflowStep: """Translates a workflow step given a workflow definition and a workflow config into a resolved workflow step. @@ -171,7 +168,7 @@ def resolve_workflow_step( def resolve_workflow( workflow_definition: WorkflowDefinition, - original_model: MetadataModel, + original_model: SchemaPack, workflow_config: WorkflowConfig, ) -> ResolvedWorkflow: """Translates a workflow definition given an input model and a workflow config into @@ -225,10 +222,10 @@ def __init__( self, workflow_definition: WorkflowDefinition, workflow_config: WorkflowConfig, - original_model: MetadataModel, + original_model: SchemaPack, ): """Initialize the WorkflowHandler with a workflow deinition, a matching - config, and a metadata model. The workflow definition is translated into a + config, and a model. The workflow definition is translated into a resolved workflow. """ self._resolved_workflow = resolve_workflow( @@ -241,25 +238,17 @@ def __init__( self._resolved_workflow ) - def run( - self, *, metadata: Json, annotation: SubmissionAnnotation - ) -> dict[str, Json]: - """Run the workflow definition on metadata and its annotation to generate - artifacts. - """ - transformed_metadata: dict[str, Json] = {} + def run(self, *, data: DataPack) -> dict[str, DataPack]: + """Run the workflow definition on data to generate artifacts.""" + transformed_data: dict[str, DataPack] = {} for step_name in self._resolved_workflow.step_order: step = self._resolved_workflow.steps[step_name] - input_metadata = ( - metadata if step.input is None else transformed_metadata[step.input] - ) - transformed_metadata[ - step_name - ] = step.transformation_handler.transform_metadata( - input_metadata, annotation=annotation + input_data = data if step.input is None else transformed_data[step.input] + transformed_data[step_name] = step.transformation_handler.transform_data( + input_data ) return { - artifact_name: transformed_metadata[step_name] + artifact_name: transformed_data[step_name] for artifact_name, step_name in self._resolved_workflow.artifacts.items() } diff --git a/src/metldata/schemapack_/transform/main.py b/src/metldata/schemapack_/transform/main.py deleted file mode 100644 index 4e8b4cbc..00000000 --- a/src/metldata/schemapack_/transform/main.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln -# for the German Human Genome-Phenome Archive (GHGA) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Main logic for running a transformation workflow on submissions.""" - - -from collections.abc import Awaitable -from typing import Callable - -from metldata.event_handling.event_handling import ( - FileSystemEventPublisher, - FileSystemEventSubscriber, -) -from metldata.event_handling.models import SubmissionEventPayload -from metldata.model_utils.essentials import MetadataModel -from metldata.transform.artifact_publisher import ArtifactEvent, ArtifactEventPublisher -from metldata.transform.base import WorkflowConfig, WorkflowDefinition -from metldata.transform.config import TransformationEventHandlingConfig -from metldata.transform.handling import WorkflowHandler -from metldata.transform.source_event_subscriber import SourceEventSubscriber - - -async def run_workflow_on_source_event( - source_event: SubmissionEventPayload, - workflow_handler: WorkflowHandler, - publish_artifact_func: Callable[[ArtifactEvent], Awaitable[None]], -) -> None: - """Run a transformation workflow on a source event and publish the artifact using - the provided artifact publisher. - - Args: - source_event: - The source event corresponding to one submission and its content. - workflow_handler: - The workflow handler pre-configured with a workflow definition, a workflow - config, and the original model of the source events. - publish_artifact_func: A function for publishing artifacts. - """ - artifacts = workflow_handler.run( - metadata=source_event.content, annotation=source_event.annotation - ) - - for artifact_type, artifact_content in artifacts.items(): - artifact_event = ArtifactEvent( - artifact_type=artifact_type, - payload=source_event.model_copy(update={"content": artifact_content}), - ) - - await publish_artifact_func(artifact_event) - - -async def run_workflow_on_all_source_events( - *, - event_config: TransformationEventHandlingConfig, - workflow_definition: WorkflowDefinition, - worflow_config: WorkflowConfig, - original_model: MetadataModel, -): - """Run a subscriber to hand source events to a transformation workflow and - run a publisher for publishing artifacts. - """ - workflow_handler = WorkflowHandler( - workflow_definition=workflow_definition, - workflow_config=worflow_config, - original_model=original_model, - ) - event_publisher = FileSystemEventPublisher(config=event_config) - artifact_publisher = ArtifactEventPublisher( - config=event_config, provider=event_publisher - ) - source_event_subscriber = SourceEventSubscriber( - config=event_config, - run_workflow_func=lambda source_event: run_workflow_on_source_event( - workflow_handler=workflow_handler, - source_event=source_event, - publish_artifact_func=artifact_publisher.publish_artifact, - ), - ) - event_subscriber = FileSystemEventSubscriber( - config=event_config, translator=source_event_subscriber - ) - await event_subscriber.run() diff --git a/tests/schemapack_/fixtures/__init__.py b/tests/schemapack_/fixtures/__init__.py new file mode 100644 index 00000000..b8a4adb5 --- /dev/null +++ b/tests/schemapack_/fixtures/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Fixtures that are used in both integration and unit tests""" diff --git a/tests/schemapack_/fixtures/data.py b/tests/schemapack_/fixtures/data.py new file mode 100644 index 00000000..7074529c --- /dev/null +++ b/tests/schemapack_/fixtures/data.py @@ -0,0 +1,34 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Valid and invalid metadata examples using the minimal model.""" + +from schemapack.load import load_datapack +from schemapack.spec.datapack import DataPack + +from tests.schemapack_.fixtures.utils import BASE_DIR + +EXAMPLE_DATA_DIR = BASE_DIR / "example_data" + + +def _get_example_data(name: str) -> DataPack: + """Get example metadata.""" + + return load_datapack(EXAMPLE_DATA_DIR / f"{name}.datapack.yaml") + + +VALID_MINIMAL_DATA = _get_example_data("valid_minimal") +INVALID_MINIMAL_DATA = _get_example_data("invalid_minimal") diff --git a/tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml b/tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml new file mode 100644 index 00000000..14e3a2e7 --- /dev/null +++ b/tests/schemapack_/fixtures/example_data/invalid_minimal.datapack.yaml @@ -0,0 +1,19 @@ +# Misses content property defined in the content schema: +datapack: 0.1.0 +resources: + File: + example_file_a: + content: + alias: example_file_a + filename: example_file_a.fastq + format: FASTQ + checksum: 1a5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92b8 + # missing size property + Dataset: + example_dataset: + content: + alias: example_dataset + dac_contact: dac@example.org + relations: + files: + - example_file_a diff --git a/tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml b/tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml new file mode 100644 index 00000000..21cca892 --- /dev/null +++ b/tests/schemapack_/fixtures/example_data/valid_minimal.datapack.yaml @@ -0,0 +1,40 @@ +datapack: 0.1.0 +resources: + File: + example_file_a: + content: + alias: example_file_a + filename: example_file_a.fastq + format: FASTQ + checksum: 1a5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92b8 + size: 12321 + example_file_b: + content: + alias: example_file_b + filename: example_file_b.fastq + format: FASTQ + checksum: 2b5ac10ab42911dc0224172c118a326d9a4c03969112a2f3eb1ad971e96e92c9 + size: 12314 + example_file_c: + content: + alias: example_file_c + filename: example_file_c.fastq + format: FASTQ + checksum: a9c24870071da03f78515e6197048f3a2172e90e597e9250cd01a0cb8f0986ed + size: 12123 + Dataset: + example_dataset_1: + content: + alias: example_dataset_1 + dac_contact: dac@example.org + relations: + files: + - example_file_a + - example_file_b + example_dataset_2: + content: + alias: example_dataset_2 + dac_contact: dac@example.org + relations: + files: + - example_file_c diff --git a/tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml b/tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml new file mode 100644 index 00000000..d79663d6 --- /dev/null +++ b/tests/schemapack_/fixtures/example_models/minimal.schemapack.yaml @@ -0,0 +1,49 @@ +# a simple schemapack with the content schemas being embedded +schemapack: 0.1.0 +classes: + File: + id: + from_content: alias + content: + "$schema": "http://json-schema.org/draft-07/schema#" + additionalProperties: false + description: A file is an object that contains information generated from a process, + either an Experiment or an Analysis. + properties: + alias: + type: string + checksum: + type: string + filename: + type: string + format: + type: string + size: + type: integer + required: + - alias + - filename + - format + - checksum + - size + type: object + Dataset: + id: + from_content: alias + content: + "$schema": "http://json-schema.org/draft-07/schema#" + additionalProperties: false + description: A dataset that is a collection of files. + properties: + alias: + type: string + dac_contact: + type: string + required: + - alias + type: object + + relations: + files: + to: File + cardinality: many_to_many diff --git a/tests/schemapack_/fixtures/models.py b/tests/schemapack_/fixtures/models.py new file mode 100644 index 00000000..dd9b514d --- /dev/null +++ b/tests/schemapack_/fixtures/models.py @@ -0,0 +1,35 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Example models.""" + +from schemapack.load import load_schemapack +from schemapack.spec.schemapack import SchemaPack + +from tests.schemapack_.fixtures.utils import BASE_DIR + +EXAMPLE_MODEL_DIR = BASE_DIR / "example_models" +VALID_MINIMAL_MODEL_EXAMPLE_PATH = EXAMPLE_MODEL_DIR / "minimal_model.yaml" + + +def _get_example_model(name: str) -> SchemaPack: + """Get example model.""" + + return load_schemapack(EXAMPLE_MODEL_DIR / f"{name}.schemapack.yaml") + + +VALID_MINIMAL_MODEL = _get_example_model("minimal") +VALID_MODELS = [VALID_MINIMAL_MODEL] diff --git a/tests/schemapack_/fixtures/utils.py b/tests/schemapack_/fixtures/utils.py new file mode 100644 index 00000000..3bbd4d0f --- /dev/null +++ b/tests/schemapack_/fixtures/utils.py @@ -0,0 +1,30 @@ +# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for Fixture handling""" + +from pathlib import Path +from typing import Any + +import yaml + +BASE_DIR = Path(__file__).parent.resolve() + + +def read_yaml(path: Path) -> dict[str, Any]: + """Read a YAML file.""" + + with open(path, encoding="utf-8") as file: + return yaml.safe_load(file) diff --git a/tests/schemapack_/transform/test_handling.py b/tests/schemapack_/transform/test_handling.py index 00a781bc..e4248df2 100644 --- a/tests/schemapack_/transform/test_handling.py +++ b/tests/schemapack_/transform/test_handling.py @@ -18,77 +18,172 @@ with builtin transformations are tested here.""" import pytest +import schemapack.exceptions +from schemapack.spec.datapack import DataPack +from schemapack.spec.schemapack import SchemaPack -from metldata.builtin_transformations.infer_references.main import ( - REFERENCE_INFERENCE_TRANSFORMATION, - ReferenceInferenceConfig, -) -from metldata.model_utils.essentials import MetadataModel -from metldata.transform.base import ( - MetadataModelAssumptionError, - MetadataModelTransformationError, +from metldata.schemapack_.builtin_transformations.null import NULL_TRANSFORMATION +from metldata.schemapack_.builtin_transformations.null.config import NullConfig +from metldata.schemapack_.transform.base import ( + DataTransformer, + ModelAssumptionError, + ModelTransformationError, TransformationDefinition, + WorkflowDefinition, + WorkflowStep, ) -from metldata.transform.handling import TransformationHandler -from tests.fixtures.metadata_models import VALID_ADVANCED_METADATA_MODEL - -VALID_EXAMPLE_CONFIG = ReferenceInferenceConfig( - inferred_ref_map={ - "Experiment": { - "files": { # type: ignore - "path": "Experiment(samples)>Sample(files)>File", - "multivalued": True, - } - } - } +from metldata.schemapack_.transform.handling import ( + TransformationHandler, + WorkflowHandler, ) +from tests.schemapack_.fixtures.data import INVALID_MINIMAL_DATA, VALID_MINIMAL_DATA +from tests.schemapack_.fixtures.models import VALID_MINIMAL_MODEL + + +def test_transformation_handler_happy(): + """Test the happy path of using a TransformationHandler.""" + + transformation_handler = TransformationHandler( + transformation_definition=NULL_TRANSFORMATION, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + # Since the null transformation was used, compare with the input: + assert transformation_handler.transformed_model == VALID_MINIMAL_MODEL + + transformed_data = transformation_handler.transform_data(VALID_MINIMAL_DATA) + + # Since the null transformation was used, compare with the input: + assert transformed_data == VALID_MINIMAL_DATA def test_transformation_handler_assumption_error(): """Test using the TransformationHandling when model assumptions are not met.""" # make transformation definition always raise an MetadataModelAssumptionError: - def always_failing_assumptions( - model: MetadataModel, config: ReferenceInferenceConfig - ): + def always_failing_assumptions(model: SchemaPack, config: NullConfig): """A function that always raises a MetadataModelAssumptionError.""" - raise MetadataModelAssumptionError + raise ModelAssumptionError - transformation = TransformationDefinition( - config_cls=REFERENCE_INFERENCE_TRANSFORMATION.config_cls, + transformation = TransformationDefinition[NullConfig]( + config_cls=NULL_TRANSFORMATION.config_cls, check_model_assumptions=always_failing_assumptions, - transform_model=REFERENCE_INFERENCE_TRANSFORMATION.transform_model, - metadata_transformer_factory=REFERENCE_INFERENCE_TRANSFORMATION.metadata_transformer_factory, + transform_model=NULL_TRANSFORMATION.transform_model, + data_transformer_factory=NULL_TRANSFORMATION.data_transformer_factory, ) - with pytest.raises(MetadataModelAssumptionError): + with pytest.raises(ModelAssumptionError): _ = TransformationHandler( transformation_definition=transformation, - transformation_config=VALID_EXAMPLE_CONFIG, - original_model=VALID_ADVANCED_METADATA_MODEL, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, ) def test_transformation_handler_model_transformation_error(): """Test using the TransformationHandling when model transformation fails.""" - # make transformation definition always raise an MetadataModelAssumptionError: - def always_failing_transformation( - original_model: MetadataModel, config: ReferenceInferenceConfig - ): - """A function that always raises a MetadataModelTransformationError.""" - raise MetadataModelTransformationError - - transformation = TransformationDefinition( - config_cls=REFERENCE_INFERENCE_TRANSFORMATION.config_cls, - check_model_assumptions=REFERENCE_INFERENCE_TRANSFORMATION.check_model_assumptions, + # make transformation definition always raise an ModelAssumptionError: + def always_failing_transformation(original_model: SchemaPack, config: NullConfig): + """A function that always raises a ModelTransformationError.""" + raise ModelTransformationError + + transformation = TransformationDefinition[NullConfig]( + config_cls=NULL_TRANSFORMATION.config_cls, + check_model_assumptions=NULL_TRANSFORMATION.check_model_assumptions, transform_model=always_failing_transformation, - metadata_transformer_factory=REFERENCE_INFERENCE_TRANSFORMATION.metadata_transformer_factory, + data_transformer_factory=NULL_TRANSFORMATION.data_transformer_factory, ) - - with pytest.raises(MetadataModelTransformationError): + with pytest.raises(ModelTransformationError): _ = TransformationHandler( transformation_definition=transformation, - transformation_config=VALID_EXAMPLE_CONFIG, - original_model=VALID_ADVANCED_METADATA_MODEL, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, ) + + +def test_transformation_handler_input_data_invalid(): + """Test the TransformationHandler when used with input data that is not valid + against the model.""" + + transformation_handler = TransformationHandler( + transformation_definition=NULL_TRANSFORMATION, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + with pytest.raises(schemapack.exceptions.ValidationError): + _ = transformation_handler.transform_data(INVALID_MINIMAL_DATA) + + +def test_transformation_handler_transformed_data_invalid(): + """Test the TransformationHandler when the transformed data fails validation + against the transformed model.""" + + class AlwaysInvalidTransformer(DataTransformer[NullConfig]): + """A transformer that always returns the same invalid data.""" + + def transform(self, data: DataPack) -> DataPack: + """Transforms data. + + Args: + data: The data as DataPack to be transformed. + + Raises: + DataTransformationError: + if the transformation fails. + """ + return INVALID_MINIMAL_DATA + + transformation = TransformationDefinition[NullConfig]( + config_cls=NULL_TRANSFORMATION.config_cls, + check_model_assumptions=NULL_TRANSFORMATION.check_model_assumptions, + transform_model=NULL_TRANSFORMATION.transform_model, + data_transformer_factory=AlwaysInvalidTransformer, + ) + + transformation_handler = TransformationHandler( + transformation_definition=transformation, + transformation_config=NullConfig(), + original_model=VALID_MINIMAL_MODEL, + ) + + with pytest.raises(schemapack.exceptions.ValidationError): + _ = transformation_handler.transform_data(VALID_MINIMAL_DATA) + + +def test_workflow_handler_happy(): + """Test the happy path of using a WorkflowHandler.""" + null_workflow = WorkflowDefinition( + description="A workflow for testing.", + steps={ + "step1": WorkflowStep( + description="A dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input=None, + ), + "step2": WorkflowStep( + description="Another dummy step.", + transformation_definition=NULL_TRANSFORMATION, + input="step1", + ), + }, + artifacts={ + "step1_output": "step1", + "step2_output": "step2", + }, + ) + + workflow_handler = WorkflowHandler( + workflow_definition=null_workflow, + workflow_config=null_workflow.config_cls.model_validate( + {"step1": {}, "step2": {}} + ), + original_model=VALID_MINIMAL_MODEL, + ) + + artifacts = workflow_handler.run(data=VALID_MINIMAL_DATA) + + # Since a null workflow was used, compare to the input: + assert artifacts["step1_output"] == artifacts["step2_output"] == VALID_MINIMAL_DATA diff --git a/tests/schemapack_/transform/test_main.py b/tests/schemapack_/transform/test_main.py deleted file mode 100644 index d04ad5d2..00000000 --- a/tests/schemapack_/transform/test_main.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln -# for the German Human Genome-Phenome Archive (GHGA) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Test the main module.""" - -import json - -import pytest - -from metldata.event_handling.artifact_events import get_artifact_topic -from metldata.event_handling.models import SubmissionEventPayload -from metldata.transform.main import ( - TransformationEventHandlingConfig, - run_workflow_on_all_source_events, -) -from tests.fixtures.event_handling import ( - Event, - FileSystemEventFixture, - file_system_event_fixture, # noqa: F401 -) -from tests.fixtures.workflows import ( - EXAMPLE_WORKFLOW_DEFINITION, - EXAMPLE_WORKFLOW_TEST_CASE, -) - - -@pytest.mark.asyncio -async def test_run_workflow_on_all_source_events( - file_system_event_fixture: FileSystemEventFixture, # noqa: F811 -): - """Test the happy path of using the run_workflow_on_all_source_events function.""" - - event_config = TransformationEventHandlingConfig( - artifact_topic_prefix="artifacts", - source_event_topic="source-events", - source_event_type="source-event", - **file_system_event_fixture.config.model_dump(), - ) - - submission_id = "some-submission-id" - source_event = Event( - topic=event_config.source_event_topic, - type_=event_config.source_event_type, - key=submission_id, - payload=json.loads( - SubmissionEventPayload( - submission_id=submission_id, - content=EXAMPLE_WORKFLOW_TEST_CASE.original_metadata, - annotation=EXAMPLE_WORKFLOW_TEST_CASE.submission_annotation, - ).model_dump_json() - ), - ) - await file_system_event_fixture.publish_events(events=[source_event]) - - expected_events = [ - Event( - topic=get_artifact_topic( - artifact_topic_prefix=event_config.artifact_topic_prefix, - artifact_type=artifact_type, - ), - type_=artifact_type, - key=submission_id, - payload=json.loads( - SubmissionEventPayload( - submission_id=submission_id, - content=artifact, - annotation=EXAMPLE_WORKFLOW_TEST_CASE.submission_annotation, - ).model_dump_json() - ), - ) - for artifact_type, artifact in EXAMPLE_WORKFLOW_TEST_CASE.artifact_metadata.items() - ] - - await run_workflow_on_all_source_events( - event_config=event_config, - workflow_definition=EXAMPLE_WORKFLOW_DEFINITION, - worflow_config=EXAMPLE_WORKFLOW_TEST_CASE.config, - original_model=EXAMPLE_WORKFLOW_TEST_CASE.original_model, - ) - - file_system_event_fixture.expect_events(expected_events=expected_events)