Skip to content

Commit

Permalink
Merge branch 'main' into feat/support-dbt-freshness-check-test
Browse files Browse the repository at this point in the history
  • Loading branch information
mgorsk1 authored Nov 27, 2024
2 parents d386555 + 804e3de commit ae5954c
Show file tree
Hide file tree
Showing 25 changed files with 184 additions and 192 deletions.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
Validators are test classes (e.g. columnValuesToBeBetween, etc.)
"""

from abc import ABC, abstractmethod
from datetime import datetime, timezone
from enum import Enum
from typing import TYPE_CHECKING, Set, Type, Union

from metadata.data_quality.validations.base_test_handler import BaseTestValidator
Expand All @@ -25,48 +25,67 @@
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
from metadata.generated.schema.type.basic import Timestamp
from metadata.profiler.processor.runner import QueryRunner
from metadata.utils import importer
from metadata.utils.importer import import_test_case_class

if TYPE_CHECKING:
from pandas import DataFrame


class IValidatorBuilder(ABC):
"""Interface for validator builders"""
class TestCaseImporter:
def import_test_case_validator(
self,
test_type: str,
runner_type: str,
test_definition: str,
) -> Type[BaseTestValidator]:
return import_test_case_class(test_type, runner_type, test_definition)

@property
def test_case(self):
"""Return the test case object"""
return self._test_case

@property
def validator(self):
"""Return the validator object"""
return self._validator
class SourceType(Enum):
PANDAS = "pandas"
SQL = "sqlalchemy"


class ValidatorBuilder(TestCaseImporter):
"""Interface for validator builders"""

def __init__(
self,
runner: Union[QueryRunner, "DataFrame"],
test_case: TestCase,
source_type: SourceType,
entity_type: str,
) -> None:
"""Builder object for SQA validators. This builder is used to create a validator object
Args:
runner (QueryRunner): The runner object
test_case (TestCase): The test case object
source_type (SourceType): The source type
entity_type (str): one of COLUMN or TABLE -- fetched from the test definition
"""
super().__init__()
self._test_case = test_case
self.runner = runner
# TODO this will be removed on https://github.com/open-metadata/OpenMetadata/pull/18716
self.validator_cls: Type[BaseTestValidator] = importer.import_test_case_class(
self.validator_cls: Type[
BaseTestValidator
] = super().import_test_case_validator(
entity_type,
self._get_source_type(),
self.test_case.testDefinition.fullyQualifiedName, # type: ignore
source_type.value,
self.test_case.testDefinition.fullyQualifiedName,
)
self.reset()

@property
def test_case(self):
"""Return the test case object"""
return self._test_case

@property
def validator(self):
"""Return the validator object"""
return self._validator

def set_runtime_params(self, runtime_params_setters: Set[RuntimeParameterSetter]):
"""Set the runtime parameters for the validator object
Expand All @@ -93,8 +112,3 @@ def reset(self):
int(datetime.now(tz=timezone.utc).timestamp() * 1000)
),
)

@abstractmethod
def _get_source_type(self):
"""Get the source type"""
raise NotImplementedError
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
Interfaces with database for all database engine
supporting sqlalchemy abstraction layer
"""
from metadata.data_quality.builders.i_validator_builder import IValidatorBuilder
from metadata.data_quality.builders.pandas_validator_builder import (
PandasValidatorBuilder,

from metadata.data_quality.builders.validator_builder import (
SourceType,
ValidatorBuilder,
)
from metadata.data_quality.interface.test_suite_interface import TestSuiteInterface
from metadata.generated.schema.entity.data.table import Table
Expand Down Expand Up @@ -44,13 +45,14 @@ def __init__(
ometa_client: OpenMetadata,
sampler: SamplerInterface,
table_entity: Table,
**__,
**kwargs,
):
super().__init__(
service_connection_config,
ometa_client,
sampler,
table_entity,
**kwargs,
)

(
Expand All @@ -63,5 +65,10 @@ def __init__(

def _get_validator_builder(
self, test_case: TestCase, entity_type: str
) -> IValidatorBuilder:
return PandasValidatorBuilder(self.dataset, test_case, entity_type)
) -> ValidatorBuilder:
return self.validator_builder_class(
runner=self.dataset,
test_case=test_case,
entity_type=entity_type,
source_type=SourceType.PANDAS,
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
from sqlalchemy.orm import DeclarativeMeta
from sqlalchemy.orm.util import AliasedClass

from metadata.data_quality.builders.i_validator_builder import IValidatorBuilder
from metadata.data_quality.builders.sqa_validator_builder import SQAValidatorBuilder
from metadata.data_quality.builders.validator_builder import (
SourceType,
ValidatorBuilder,
)
from metadata.data_quality.interface.test_suite_interface import TestSuiteInterface
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
Expand Down Expand Up @@ -51,14 +53,14 @@ def __init__(
ometa_client: OpenMetadata,
sampler: SamplerInterface,
table_entity: Table = None,
**kwargs,
):
super().__init__(
service_connection_config,
ometa_client,
sampler,
table_entity,
service_connection_config, ometa_client, sampler, table_entity, **kwargs
)
self.source_type = SourceType.SQL
self.create_session()

(
self.table_sample_query,
self.table_sample_config,
Expand Down Expand Up @@ -109,5 +111,10 @@ def _create_runner(self) -> QueryRunner:

def _get_validator_builder(
self, test_case: TestCase, entity_type: str
) -> IValidatorBuilder:
return SQAValidatorBuilder(self.runner, test_case, entity_type)
) -> ValidatorBuilder:
return self.validator_builder_class(
runner=self.runner,
test_case=test_case,
entity_type=entity_type,
source_type=self.source_type,
)
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from abc import ABC, abstractmethod
from typing import Optional, Set, Type

from metadata.data_quality.builders.i_validator_builder import IValidatorBuilder
from metadata.data_quality.builders.validator_builder import ValidatorBuilder
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
from metadata.data_quality.validations.runtime_param_setter.param_setter import (
RuntimeParameterSetter,
Expand Down Expand Up @@ -48,12 +48,14 @@ def __init__(
ometa_client: OpenMetadata,
sampler: SamplerInterface,
table_entity: Table,
validator_builder: Type[ValidatorBuilder],
):
"""Required attribute for the interface"""
self.ometa_client = ometa_client
self.service_connection_config = service_connection_config
self.table_entity = table_entity
self.sampler = sampler
self.validator_builder_class = validator_builder

@classmethod
def create(
Expand All @@ -77,15 +79,15 @@ def create(
@abstractmethod
def _get_validator_builder(
self, test_case: TestCase, entity_type: str
) -> IValidatorBuilder:
) -> ValidatorBuilder:
"""get the builder class for the validator. Define this in the implementation class
Args:
test_case (TestCase): test case object
entity_type (str): type of the entity
Returns:
IValidatorBuilder: a validator builder
ValidatorBuilder: a validator builder
"""
raise NotImplementedError

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@
TestCaseResults,
TestSuiteProcessorConfig,
)
from metadata.data_quality.runner.base_test_suite_source import BaseTestSuiteRunner
from metadata.data_quality.runner.core import DataTestsRunner
from metadata.data_quality.runner.test_suite_source_factory import (
test_suite_source_factory,
)
from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
Expand Down Expand Up @@ -95,12 +93,7 @@ def _run(self, record: TableAndTests) -> Either:
record.table, openmetadata_test_cases
)

test_suite_runner = test_suite_source_factory.create(
record.service_type.lower(),
self.config,
self.metadata,
record.table,
).get_data_quality_runner()
test_suite_runner = self.get_test_suite_runner(record.table)

logger.debug(
f"Found {len(openmetadata_test_cases)} test cases for table {record.table.fullyQualifiedName.root}"
Expand Down Expand Up @@ -357,3 +350,10 @@ def filter_incompatible_test_cases(
else:
result.append(tc)
return result

def get_test_suite_runner(self, table: Table):
return BaseTestSuiteRunner(
self.config,
self.metadata,
table,
).get_data_quality_runner()
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from copy import deepcopy
from typing import Optional, cast

from metadata.data_quality.builders.validator_builder import ValidatorBuilder
from metadata.data_quality.interface.test_suite_interface import TestSuiteInterface
from metadata.data_quality.runner.core import DataTestsRunner
from metadata.generated.schema.entity.data.table import Table
Expand Down Expand Up @@ -46,6 +47,7 @@ def __init__(
ometa_client: OpenMetadata,
entity: Table,
):
self.validator_builder_class = ValidatorBuilder
self._interface = None
self._interface_type: str = config.source.type.lower()
self.entity = entity
Expand Down Expand Up @@ -125,10 +127,11 @@ def create_data_quality_interface(self) -> TestSuiteInterface:
)

self.interface: TestSuiteInterface = test_suite_class.create(
self.service_conn_config,
self.ometa_client,
sampler_interface,
self.entity,
service_connection_config=self.service_conn_config,
ometa_client=self.ometa_client,
sampler=sampler_interface,
table_entity=self.entity,
validator_builder=self.validator_builder_class,
)
return self.interface

Expand Down

This file was deleted.

Loading

0 comments on commit ae5954c

Please sign in to comment.