From 3c427e9825415d3e0f5255e309e8ed9caa5436c8 Mon Sep 17 00:00:00 2001 From: kumar saurav Date: Wed, 4 Sep 2024 00:15:44 +0530 Subject: [PATCH] add: timestamp validation add: timestamp validation add: timestamp validation fix: module error --- dcs_core/core/datasource/manager.py | 4 +- dcs_core/core/datasource/sql_datasource.py | 280 ++++++++++++++++++ dcs_core/core/validation/manager.py | 12 + .../core/validation/validity_validation.py | 126 ++++++++ docs/validations/validity.md | 73 ++++- .../configuration/test_configuration_v1.py | 90 ++++++ .../datasource/test_sql_datasource.py | 68 ++++- 7 files changed, 643 insertions(+), 10 deletions(-) diff --git a/dcs_core/core/datasource/manager.py b/dcs_core/core/datasource/manager.py index e6f9014..44d580c 100644 --- a/dcs_core/core/datasource/manager.py +++ b/dcs_core/core/datasource/manager.py @@ -78,7 +78,7 @@ def _create_data_source( data_source_name = data_source_config.name data_source_type = data_source_config.type if data_source_type == "spark_df": - from datachecks.integrations.databases.spark_df import SparkDFDataSource + from dcs_core.integrations.databases.spark_df import SparkDFDataSource return SparkDFDataSource( data_source_name, @@ -86,7 +86,7 @@ def _create_data_source( ) try: module_name = ( - f"datachecks.integrations.databases.{data_source_config.type.value}" + f"dcs_core.integrations.databases.{data_source_config.type.value}" ) module = importlib.import_module(module_name) data_source_class = self.DATA_SOURCE_CLASS_NAME_MAPPER[ diff --git a/dcs_core/core/datasource/sql_datasource.py b/dcs_core/core/datasource/sql_datasource.py index 25e70fa..45c33c5 100644 --- a/dcs_core/core/datasource/sql_datasource.py +++ b/dcs_core/core/datasource/sql_datasource.py @@ -674,3 +674,283 @@ def query_negative_metric( result = self.fetchone(query)[0] return round(result, 2) if operation == "percent" else result + + def query_timestamp_metric( + self, + table: str, + field: str, + operation: str, + predefined_regex: str, + filters: str = None, + ) -> Union[float, int]: + """ + :param table: Table name + :param field: Column name + :param operation: Metric operation ("count" or "percent") + :param predefined_regex: regex pattern + :param filters: filter condition + :return: Tuple containing valid count and total count (or percentage) + """ + + qualified_table_name = self.qualified_table_name(table) + + timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$" + + if predefined_regex == "timestamp_iso": + regex_condition = f"{field} ~ '{timestamp_iso_regex}'" + else: + raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}") + + filters_clause = f"WHERE {filters}" if filters else "" + + query = f""" + WITH extracted_timestamps AS ( + SELECT + {field}, + SUBSTRING({field} FROM '^(\d{{4}})') AS year, -- Extract year + SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month + SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day + SUBSTRING({field} FROM 'T(\d{{2}})') AS hour, -- Extract hour + SUBSTRING({field} FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute + SUBSTRING({field} FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second + SUBSTRING({field} FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone + FROM {qualified_table_name} + {filters_clause} + ), + validated_timestamps AS ( + SELECT + {field}, + CASE + WHEN + -- Validate each component with its specific rules + year ~ '^\d{{4}}$' AND + month ~ '^(0[1-9]|1[0-2])$' AND + day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND + hour ~ '^([01][0-9]|2[0-3])$' AND + minute ~ '^[0-5][0-9]$' AND + second ~ '^[0-5][0-9]$' AND + (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND + -- Additional check for days in months (e.g., February) + ( + (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR + (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR + (month = '02' AND day BETWEEN '01' AND + CASE + -- Handle leap years + WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29' + ELSE '28' + END + ) + ) + THEN 1 + ELSE 0 + END AS is_valid + FROM extracted_timestamps + ) + SELECT COUNT(*) AS valid_count, COUNT(*) AS total_count + FROM validated_timestamps + WHERE is_valid = 1; + """ + + try: + valid_count = self.fetchone(query)[0] + total_count_query = ( + f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}" + ) + total_count = self.fetchone(total_count_query)[0] + + if operation == "count": + return valid_count, total_count + elif operation == "percent": + return valid_count, total_count + else: + raise ValueError(f"Unknown operation: {operation}") + + except Exception as e: + print(f"Error occurred: {e}") + return 0, 0 + + def query_timestamp_not_in_future_metric( + self, + table: str, + field: str, + operation: str, + predefined_regex: str, + filters: str = None, + ) -> Union[float, int]: + """ + :param table: Table name + :param field: Column name + :param operation: Metric operation ("count" or "percent") + :param predefined_regex: regex pattern + :param filters: filter condition + :return: Tuple containing count of valid timestamps not in the future and total count + """ + + qualified_table_name = self.qualified_table_name(table) + + timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$" + + if predefined_regex == "timestamp_iso": + regex_condition = f"{field} ~ '{timestamp_iso_regex}'" + else: + raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}") + + filters_clause = f"WHERE {filters}" if filters else "" + + query = f""" + WITH extracted_timestamps AS ( + SELECT + {field}, + SUBSTRING({field} FROM '^(\d{{4}})') AS year, -- Extract year + SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month + SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day + SUBSTRING({field} FROM 'T(\d{{2}})') AS hour, -- Extract hour + SUBSTRING({field} FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute + SUBSTRING({field} FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second + SUBSTRING({field} FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone + FROM {qualified_table_name} + {filters_clause} + ), + validated_timestamps AS ( + SELECT + {field}, + CASE + WHEN + year ~ '^\d{{4}}$' AND + month ~ '^(0[1-9]|1[0-2])$' AND + day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND + hour ~ '^([01][0-9]|2[0-3])$' AND + minute ~ '^[0-5][0-9]$' AND + second ~ '^[0-5][0-9]$' AND + (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND + ( + (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR + (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR + (month = '02' AND day BETWEEN '01' AND + CASE + WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29' + ELSE '28' + END + ) + ) + THEN 1 + ELSE 0 + END AS is_valid + FROM extracted_timestamps + ), + timestamps_not_in_future AS ( + SELECT * + FROM validated_timestamps + WHERE is_valid = 1 AND ({field} ~ '{timestamp_iso_regex}') AND {field}::timestamp <= CURRENT_TIMESTAMP + ) + SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}) AS total_count + FROM timestamps_not_in_future; + """ + try: + valid_count = self.fetchone(query)[0] + total_count_query = ( + f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}" + ) + total_count = self.fetchone(total_count_query)[0] + + if operation == "count": + return valid_count, total_count + elif operation == "percent": + return valid_count, total_count + else: + raise ValueError(f"Unknown operation: {operation}") + + except Exception as e: + print(f"Error occurred: {e}") + return 0, 0 + + def query_timestamp_date_not_in_future_metric( + self, + table: str, + field: str, + operation: str, + predefined_regex: str, + filters: str = None, + ) -> Union[float, int]: + """ + :param table: Table name + :param field: Column name + :param operation: Metric operation ("count" or "percent") + :param predefined_regex: The regex pattern to use (e.g., "timestamp_iso") + :param filters: Optional filter condition + :return: Tuple containing count of valid dates not in the future and total count + """ + + qualified_table_name = self.qualified_table_name(table) + + timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$" + + if predefined_regex == "timestamp_iso": + regex_condition = f"{field} ~ '{timestamp_iso_regex}'" + else: + raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}") + + filters_clause = f"WHERE {filters}" if filters else "" + + query = f""" + WITH extracted_timestamps AS ( + SELECT + {field}, + SUBSTRING({field} FROM '^(\d{{4}})') AS year, -- Extract year + SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month + SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day -- Extract day + FROM {qualified_table_name} + {filters_clause} + ), + validated_dates AS ( + SELECT + {field}, + CASE + WHEN + year ~ '^\d{{4}}$' AND + month ~ '^(0[1-9]|1[0-2])$' AND + day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND + ( + (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR + (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR + (month = '02' AND day BETWEEN '01' AND + CASE + WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29' + ELSE '28' + END + ) + ) + THEN 1 + ELSE 0 + END AS is_valid + FROM extracted_timestamps + ), + dates_not_in_future AS ( + SELECT * + FROM validated_dates + WHERE is_valid = 1 + AND ({field} ~ '{timestamp_iso_regex}') + AND ({field})::date <= CURRENT_DATE -- Compare only the date part against the current date + ) + SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}) AS total_count + FROM dates_not_in_future; + """ + + try: + valid_count = self.fetchone(query)[0] + total_count_query = ( + f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}" + ) + total_count = self.fetchone(total_count_query)[0] + + if operation == "count": + return valid_count, total_count + elif operation == "percent": + return valid_count, total_count + else: + raise ValueError(f"Unknown operation: {operation}") + + except Exception as e: + print(f"Error occurred: {e}") + return 0, 0 diff --git a/dcs_core/core/validation/manager.py b/dcs_core/core/validation/manager.py index 2bfe40f..f683d26 100644 --- a/dcs_core/core/validation/manager.py +++ b/dcs_core/core/validation/manager.py @@ -58,6 +58,7 @@ ) from dcs_core.core.validation.validity_validation import ( # noqa F401 this is used in globals CountCUSIPValidation, + CountDateNotInFutureValidation, CountEmailValidation, CountFIGIValidation, CountInvalidRegex, @@ -66,9 +67,11 @@ CountLatitudeValidation, CountLEIValidation, CountLongitudeValidation, + CountNotInFutureValidation, CountPermIDValidation, CountSEDOLValidation, CountSSNValidation, + CountTimeStampValidation, CountUSAPhoneValidation, CountUSAStateCodeValidation, CountUSAZipCodeValidation, @@ -76,6 +79,7 @@ CountValidRegex, CountValidValues, PercentCUSIPValidation, + PercentDateNotInFutureValidation, PercentEmailValidation, PercentFIGIValidation, PercentInvalidRegex, @@ -84,9 +88,11 @@ PercentLatitudeValidation, PercentLEIValidation, PercentLongitudeValidation, + PercentNotInFutureValidation, PercentPermIDValidation, PercentSEDOLValidation, PercentSSNValidation, + PercentTimeStampValidation, PercentUSAPhoneValidation, PercentUSAStateCodeValidation, PercentUSAZipCodeValidation, @@ -165,6 +171,12 @@ class ValidationManager: ValidationFunction.PERCENT_ZERO.value: "PercentZeroValidation", ValidationFunction.COUNT_NEGATIVE.value: "CountNegativeValidation", ValidationFunction.PERCENT_NEGATIVE.value: "PercentNegativeValidation", + ValidationFunction.COUNT_TIMESTAMP_STRING.value: "CountTimeStampValidation", + ValidationFunction.PERCENT_TIMESTAMP_STRING.value: "PercentTimeStampValidation", + ValidationFunction.COUNT_NOT_IN_FUTURE.value: "CountNotInFutureValidation", + ValidationFunction.PERCENT_NOT_IN_FUTURE.value: "PercentNotInFutureValidation", + ValidationFunction.COUNT_DATE_NOT_IN_FUTURE.value: "CountDateNotInFutureValidation", + ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE.value: "PercentDateNotInFutureValidation", } def __init__( diff --git a/dcs_core/core/validation/validity_validation.py b/dcs_core/core/validation/validity_validation.py index da472a7..527160d 100644 --- a/dcs_core/core/validation/validity_validation.py +++ b/dcs_core/core/validation/validity_validation.py @@ -698,3 +698,129 @@ def _generate_metric_value(self, **kwargs) -> Union[float, int]: raise NotImplementedError( "Perm ID validation is only supported for SQL data sources" ) + + +class CountTimeStampValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + valid_count, total_row_count = self.data_source.query_timestamp_metric( + table=self.dataset_name, + field=self.field_name, + operation="count", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return valid_count + else: + raise ValueError( + "Unsupported data source type for CountTimeStampValidation" + ) + + +class PercentTimeStampValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + valid_count, total_row_count = self.data_source.query_timestamp_metric( + table=self.dataset_name, + field=self.field_name, + operation="percent", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return ( + round((valid_count / total_row_count) * 100, 2) + if total_row_count > 0 + else 0.0 + ) + else: + raise ValueError( + "Unsupported data source type for PercentTimeStampValidation" + ) + + +class CountNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="count", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return valid_count + else: + raise ValueError( + "Unsupported data source type for CountNotInFutureValidation" + ) + + +class PercentNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="percent", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return ( + round((valid_count / total_row_count) * 100, 2) + if total_row_count > 0 + else 0.0 + ) + else: + raise ValueError( + "Unsupported data source type for PercentNotInFutureValidation" + ) + + +class CountDateNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_date_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="count", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return valid_count + else: + raise ValueError( + "Unsupported data source type for CountDateNotInFutureValidation" + ) + + +class PercentDateNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_date_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="percent", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return ( + round((valid_count / total_row_count) * 100, 2) + if total_row_count > 0 + else 0.0 + ) + else: + raise ValueError( + "Unsupported data source type for PercentDateNotInFutureValidation" + ) diff --git a/docs/validations/validity.md b/docs/validations/validity.md index 4ab33da..bbe74aa 100644 --- a/docs/validations/validity.md +++ b/docs/validations/validity.md @@ -277,7 +277,7 @@ validations for location_db.geolocation: on: percent_longitude(longitude_column_name) threshold: "> 80" ``` -``` + ## Count SSN @@ -505,3 +505,74 @@ validations for product_db.products: on: percent_negative(price) threshold: "< 40" ``` +## Count Timestamp String + +The count timestamp string validation checks the number of valid timestamp string in ISO format in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - count_valid_timestamp: + on: count_timestamp_string(timestamp) +``` + +## Percent Timestamp String + +The percent timestamp string validation checks the percentage of valid timestamp string in ISO format in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - percent_valid_timestamp: + on: percent_timestamp_string(timestamp) +``` + +## Count Not In Future + +The count not in future validation checks the number of valid timestamp string that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - count_timestamp_not_in_future: + on: count_not_in_future(future_timestamp) +``` + +## Percent Not In Future + +The percent date not in future validation checks the percentage of valid timestamp string that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - percent_timestamp_not_in_future: + on: percent_not_in_future(future_timestamp) +``` + +## Count Date Not In Future + +The count date not in future validation checks the number of valid timestamp string with date that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - count_date_not_in_future: + on: count_date_not_in_future(future_timestamp) +``` + +## Percent Date Not In Future + +The percent date not in future validation checks the percentage of valid timestamp string with date that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - percent_date_not_in_future: + on: percent_date_not_in_future(future_timestamp) +``` diff --git a/tests/core/configuration/test_configuration_v1.py b/tests/core/configuration/test_configuration_v1.py index aa756b3..771bc31 100644 --- a/tests/core/configuration/test_configuration_v1.py +++ b/tests/core/configuration/test_configuration_v1.py @@ -1025,3 +1025,93 @@ def test_should_parse_percent_negative_validation(): .get_validation_function == ValidationFunction.PERCENT_NEGATIVE ) + + +def test_should_parse_count_timestamp_string(): + yaml_string = """ + validations for source.table: + - test: + on: count_timestamp_string(timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.COUNT_TIMESTAMP_STRING + ) + + +def test_should_parse_percent_timestamp_string(): + yaml_string = """ + validations for source.table: + - test: + on: percent_timestamp_string(timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.PERCENT_TIMESTAMP_STRING + ) + + +def test_should_parse_count_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: count_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.COUNT_NOT_IN_FUTURE + ) + + +def test_should_parse_percent_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: percent_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.PERCENT_NOT_IN_FUTURE + ) + + +def test_should_parse_count_date_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: count_date_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.COUNT_DATE_NOT_IN_FUTURE + ) + + +def test_should_parse_percent_date_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: percent_date_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE + ) diff --git a/tests/integration/datasource/test_sql_datasource.py b/tests/integration/datasource/test_sql_datasource.py index d5f414e..8a0e168 100644 --- a/tests/integration/datasource/test_sql_datasource.py +++ b/tests/integration/datasource/test_sql_datasource.py @@ -132,7 +132,10 @@ def setup_tables( isin VARCHAR(12), perm_id VARCHAR(50), salary INTEGER, - price FLOAT + price FLOAT, + timestamp VARCHAR(50), + not_in_future VARCHAR(50), + date_not_in_future VARCHAR(50) ) """ ) @@ -144,27 +147,33 @@ def setup_tables( ('thor', '{(utc_now - datetime.timedelta(days=10)).strftime("%Y-%m-%d")}', 1500, NULL, 'thor hammer', 'e7194aaa-5516-4362-a5ff-6ff971976bec', '123-456-7890', 'jane.doe@domain', 'C2', 'ABCDE', 40.0678, -7555555554.0060,'856-45-6789','0067340', - 'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0), -- invalid email -- invalid usa_state_code -- invalid usa_zip_code -- invalid cusip -- invalid perm_id + 'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0,'2024-01-15T12:30:45Z', + '2024-09-06T01:15:00Z','2023-12-31T23:59:59+01:00'), -- invalid email -- invalid usa_state_code -- invalid usa_zip_code -- invalid cusip -- invalid perm_id ('captain america', '{(utc_now - datetime.timedelta(days=3)).strftime("%Y-%m-%d")}', 90, 80, 'shield', 'e7194aaa-5516-4362-a5ff-6ff971976b', '(123) 456-7890', 'john.doe@.com ', 'NY', '12-345', 34.0522, -118.2437,'000-12-3456', 'B01HL06', - 'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn + 'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0,'2021-06-15T08:22:33.123Z', + '2024-08-25T09:15:00Z','2024-08-25T09:15:00Z'), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn ('iron man', '{(utc_now - datetime.timedelta(days=4)).strftime("%Y-%m-%d")}', 50, 70, 'suit', '1739c676-6108-4dd2-8984-2459df744936', '123 456 7890', 'contact@company..org', 'XY', '85001', 37.7749, -122.4194,'859-99-9999','4155586', - 'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id + 'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0,'2024-04-31T12:30:45Z', + '2024-10-07T12:00:00Z','2024-10-10T12:00:00Z'), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id ('hawk eye', '{(utc_now - datetime.timedelta(days=5)).strftime("%Y-%m-%d")}', 40, 60, 'bow', '1739c676-6108-4dd2-8984-2459df746', '+1 123-456-7890', 'user@@example.com', 'TX', '30301', 51.1657, 10.4515,'123-45-67890','12345', - 'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol + 'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0,'2023-13-01T00:00:00Z', + '2025-01-01T00:00:00+01:00','2024-09-06T01:15:00Z'), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol ('clark kent', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}', 35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8', '09123.456.7890', 'contact@company.org', 'ZZ', '123456', 51.5074, -0.1278,'666-45-6789','34A56B7', - '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id + '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0,'2023-03-08T16:45:00+02:00', + '2024-12-31T23:59:59Z','2024-09-06T01:15:00Z'), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id ('black widow', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}', 35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8032c73', '+1 (123) 456-7890', 'jane_smith123@domain.co.uk', 'FL', '90210', 483.8566, 2.3522,'001-01-0001','456VGHY', - '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0) -- invalid isin -- invalid sedol + '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0,'2021-06-15T08:22:33.123Z', + '2024-10-10T12:00:00Z','2024-09-05T23:59:59Z') -- invalid isin -- invalid sedol """ postgresql_connection.execute(text(insert_query)) @@ -641,3 +650,48 @@ def test_should_return_percent_negative( table=self.TABLE_NAME, field="price", operation="percent" ) assert round(percent_negative, 2) == 50.0 + + def test_should_return_row_count_for_valid_timestamp_string( + self, postgres_datasource: PostgresDataSource + ): + ( + valid_count, + total_row_count, + ) = postgres_datasource.query_timestamp_metric( + table=self.TABLE_NAME, + field="timestamp", + operation="count", + predefined_regex="timestamp_iso", + ) + assert valid_count == 4 + assert total_row_count == 6 + + def test_should_return_row_count_for_valid_timestamp_not_in_future( + self, postgres_datasource: PostgresDataSource + ): + ( + valid_count, + total_row_count, + ) = postgres_datasource.query_timestamp_not_in_future_metric( + table=self.TABLE_NAME, + field="not_in_future", + operation="count", + predefined_regex="timestamp_iso", + ) + assert valid_count == 2 + assert total_row_count == 6 + + def test_should_return_row_count_for_valid_timestamp_date_not_in_future( + self, postgres_datasource: PostgresDataSource + ): + ( + valid_count, + total_row_count, + ) = postgres_datasource.query_timestamp_date_not_in_future_metric( + table=self.TABLE_NAME, + field="date_not_in_future", + operation="count", + predefined_regex="timestamp_iso", + ) + assert valid_count == 5 + assert total_row_count == 6