From 3c427e9825415d3e0f5255e309e8ed9caa5436c8 Mon Sep 17 00:00:00 2001
From: kumar saurav <saurav@waterdip.ai>
Date: Wed, 4 Sep 2024 00:15:44 +0530
Subject: [PATCH] add: timestamp validation

add: timestamp validation

add: timestamp validation

fix: module error
---
 dcs_core/core/datasource/manager.py           |   4 +-
 dcs_core/core/datasource/sql_datasource.py    | 280 ++++++++++++++++++
 dcs_core/core/validation/manager.py           |  12 +
 .../core/validation/validity_validation.py    | 126 ++++++++
 docs/validations/validity.md                  |  73 ++++-
 .../configuration/test_configuration_v1.py    |  90 ++++++
 .../datasource/test_sql_datasource.py         |  68 ++++-
 7 files changed, 643 insertions(+), 10 deletions(-)

diff --git a/dcs_core/core/datasource/manager.py b/dcs_core/core/datasource/manager.py
index e6f9014..44d580c 100644
--- a/dcs_core/core/datasource/manager.py
+++ b/dcs_core/core/datasource/manager.py
@@ -78,7 +78,7 @@ def _create_data_source(
         data_source_name = data_source_config.name
         data_source_type = data_source_config.type
         if data_source_type == "spark_df":
-            from datachecks.integrations.databases.spark_df import SparkDFDataSource
+            from dcs_core.integrations.databases.spark_df import SparkDFDataSource
 
             return SparkDFDataSource(
                 data_source_name,
@@ -86,7 +86,7 @@ def _create_data_source(
             )
         try:
             module_name = (
-                f"datachecks.integrations.databases.{data_source_config.type.value}"
+                f"dcs_core.integrations.databases.{data_source_config.type.value}"
             )
             module = importlib.import_module(module_name)
             data_source_class = self.DATA_SOURCE_CLASS_NAME_MAPPER[
diff --git a/dcs_core/core/datasource/sql_datasource.py b/dcs_core/core/datasource/sql_datasource.py
index 25e70fa..45c33c5 100644
--- a/dcs_core/core/datasource/sql_datasource.py
+++ b/dcs_core/core/datasource/sql_datasource.py
@@ -674,3 +674,283 @@ def query_negative_metric(
 
         result = self.fetchone(query)[0]
         return round(result, 2) if operation == "percent" else result
+
+    def query_timestamp_metric(
+        self,
+        table: str,
+        field: str,
+        operation: str,
+        predefined_regex: str,
+        filters: str = None,
+    ) -> Union[float, int]:
+        """
+        :param table: Table name
+        :param field: Column name
+        :param operation: Metric operation ("count" or "percent")
+        :param predefined_regex: regex pattern
+        :param filters: filter condition
+        :return: Tuple containing valid count and total count (or percentage)
+        """
+
+        qualified_table_name = self.qualified_table_name(table)
+
+        timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
+
+        if predefined_regex == "timestamp_iso":
+            regex_condition = f"{field} ~ '{timestamp_iso_regex}'"
+        else:
+            raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
+
+        filters_clause = f"WHERE {filters}" if filters else ""
+
+        query = f"""
+         WITH extracted_timestamps AS (
+         SELECT
+            {field},
+            SUBSTRING({field} FROM '^(\d{{4}})') AS year,        -- Extract year
+            SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month
+            SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day
+            SUBSTRING({field} FROM 'T(\d{{2}})') AS hour,       -- Extract hour
+            SUBSTRING({field} FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute
+            SUBSTRING({field} FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second
+            SUBSTRING({field} FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone
+         FROM {qualified_table_name}
+         {filters_clause}
+        ),
+         validated_timestamps AS (
+         SELECT
+            {field},
+            CASE
+                WHEN
+                    -- Validate each component with its specific rules
+                    year ~ '^\d{{4}}$' AND
+                    month ~ '^(0[1-9]|1[0-2])$' AND
+                    day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND
+                    hour ~ '^([01][0-9]|2[0-3])$' AND
+                    minute ~ '^[0-5][0-9]$' AND
+                    second ~ '^[0-5][0-9]$' AND
+                    (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND
+                    -- Additional check for days in months (e.g., February)
+                    (
+                        (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
+                        (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
+                        (month = '02' AND day BETWEEN '01' AND
+                            CASE
+                                -- Handle leap years
+                                WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29'
+                                ELSE '28'
+                            END
+                        )
+                    )
+                THEN 1
+                ELSE 0
+            END AS is_valid
+         FROM extracted_timestamps
+        )
+         SELECT COUNT(*) AS valid_count, COUNT(*) AS total_count
+         FROM validated_timestamps
+         WHERE is_valid = 1;
+        """
+
+        try:
+            valid_count = self.fetchone(query)[0]
+            total_count_query = (
+                f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}"
+            )
+            total_count = self.fetchone(total_count_query)[0]
+
+            if operation == "count":
+                return valid_count, total_count
+            elif operation == "percent":
+                return valid_count, total_count
+            else:
+                raise ValueError(f"Unknown operation: {operation}")
+
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            return 0, 0
+
+    def query_timestamp_not_in_future_metric(
+        self,
+        table: str,
+        field: str,
+        operation: str,
+        predefined_regex: str,
+        filters: str = None,
+    ) -> Union[float, int]:
+        """
+        :param table: Table name
+        :param field: Column name
+        :param operation: Metric operation ("count" or "percent")
+        :param predefined_regex: regex pattern
+        :param filters: filter condition
+        :return: Tuple containing count of valid timestamps not in the future and total count
+        """
+
+        qualified_table_name = self.qualified_table_name(table)
+
+        timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
+
+        if predefined_regex == "timestamp_iso":
+            regex_condition = f"{field} ~ '{timestamp_iso_regex}'"
+        else:
+            raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
+
+        filters_clause = f"WHERE {filters}" if filters else ""
+
+        query = f"""
+         WITH extracted_timestamps AS (
+         SELECT
+            {field},
+            SUBSTRING({field} FROM '^(\d{{4}})') AS year,        -- Extract year
+            SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month
+            SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day
+            SUBSTRING({field} FROM 'T(\d{{2}})') AS hour,       -- Extract hour
+            SUBSTRING({field} FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute
+            SUBSTRING({field} FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second
+            SUBSTRING({field} FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone
+         FROM {qualified_table_name}
+         {filters_clause}
+        ),
+         validated_timestamps AS (
+         SELECT
+            {field},
+            CASE
+                WHEN
+                    year ~ '^\d{{4}}$' AND
+                    month ~ '^(0[1-9]|1[0-2])$' AND
+                    day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND
+                    hour ~ '^([01][0-9]|2[0-3])$' AND
+                    minute ~ '^[0-5][0-9]$' AND
+                    second ~ '^[0-5][0-9]$' AND
+                    (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND
+                    (
+                        (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
+                        (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
+                        (month = '02' AND day BETWEEN '01' AND
+                            CASE
+                                WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29'
+                                ELSE '28'
+                            END
+                        )
+                    )
+                THEN 1
+                ELSE 0
+            END AS is_valid
+         FROM extracted_timestamps
+        ),
+         timestamps_not_in_future AS (
+         SELECT *
+         FROM validated_timestamps
+         WHERE is_valid = 1 AND ({field} ~ '{timestamp_iso_regex}') AND {field}::timestamp <= CURRENT_TIMESTAMP
+        )
+         SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}) AS total_count
+         FROM timestamps_not_in_future;
+         """
+        try:
+            valid_count = self.fetchone(query)[0]
+            total_count_query = (
+                f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}"
+            )
+            total_count = self.fetchone(total_count_query)[0]
+
+            if operation == "count":
+                return valid_count, total_count
+            elif operation == "percent":
+                return valid_count, total_count
+            else:
+                raise ValueError(f"Unknown operation: {operation}")
+
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            return 0, 0
+
+    def query_timestamp_date_not_in_future_metric(
+        self,
+        table: str,
+        field: str,
+        operation: str,
+        predefined_regex: str,
+        filters: str = None,
+    ) -> Union[float, int]:
+        """
+        :param table: Table name
+        :param field: Column name
+        :param operation: Metric operation ("count" or "percent")
+        :param predefined_regex: The regex pattern to use (e.g., "timestamp_iso")
+        :param filters: Optional filter condition
+        :return: Tuple containing count of valid dates not in the future and total count
+        """
+
+        qualified_table_name = self.qualified_table_name(table)
+
+        timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
+
+        if predefined_regex == "timestamp_iso":
+            regex_condition = f"{field} ~ '{timestamp_iso_regex}'"
+        else:
+            raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
+
+        filters_clause = f"WHERE {filters}" if filters else ""
+
+        query = f"""
+         WITH extracted_timestamps AS (
+         SELECT
+            {field},
+            SUBSTRING({field} FROM '^(\d{{4}})') AS year,        -- Extract year
+            SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month
+            SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day -- Extract day
+         FROM {qualified_table_name}
+         {filters_clause}
+        ),
+         validated_dates AS (
+         SELECT
+            {field},
+            CASE
+                WHEN
+                    year ~ '^\d{{4}}$' AND
+                    month ~ '^(0[1-9]|1[0-2])$' AND
+                    day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND
+                    (
+                        (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
+                        (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
+                        (month = '02' AND day BETWEEN '01' AND
+                            CASE
+                                WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29'
+                                ELSE '28'
+                            END
+                        )
+                    )
+                THEN 1
+                ELSE 0
+            END AS is_valid
+         FROM extracted_timestamps
+        ),
+         dates_not_in_future AS (
+         SELECT *
+         FROM validated_dates
+         WHERE is_valid = 1
+         AND ({field} ~ '{timestamp_iso_regex}')
+         AND ({field})::date <= CURRENT_DATE  -- Compare only the date part against the current date
+        )
+         SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}) AS total_count
+         FROM dates_not_in_future;
+        """
+
+        try:
+            valid_count = self.fetchone(query)[0]
+            total_count_query = (
+                f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}"
+            )
+            total_count = self.fetchone(total_count_query)[0]
+
+            if operation == "count":
+                return valid_count, total_count
+            elif operation == "percent":
+                return valid_count, total_count
+            else:
+                raise ValueError(f"Unknown operation: {operation}")
+
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            return 0, 0
diff --git a/dcs_core/core/validation/manager.py b/dcs_core/core/validation/manager.py
index 2bfe40f..f683d26 100644
--- a/dcs_core/core/validation/manager.py
+++ b/dcs_core/core/validation/manager.py
@@ -58,6 +58,7 @@
 )
 from dcs_core.core.validation.validity_validation import (  # noqa F401 this is used in globals
     CountCUSIPValidation,
+    CountDateNotInFutureValidation,
     CountEmailValidation,
     CountFIGIValidation,
     CountInvalidRegex,
@@ -66,9 +67,11 @@
     CountLatitudeValidation,
     CountLEIValidation,
     CountLongitudeValidation,
+    CountNotInFutureValidation,
     CountPermIDValidation,
     CountSEDOLValidation,
     CountSSNValidation,
+    CountTimeStampValidation,
     CountUSAPhoneValidation,
     CountUSAStateCodeValidation,
     CountUSAZipCodeValidation,
@@ -76,6 +79,7 @@
     CountValidRegex,
     CountValidValues,
     PercentCUSIPValidation,
+    PercentDateNotInFutureValidation,
     PercentEmailValidation,
     PercentFIGIValidation,
     PercentInvalidRegex,
@@ -84,9 +88,11 @@
     PercentLatitudeValidation,
     PercentLEIValidation,
     PercentLongitudeValidation,
+    PercentNotInFutureValidation,
     PercentPermIDValidation,
     PercentSEDOLValidation,
     PercentSSNValidation,
+    PercentTimeStampValidation,
     PercentUSAPhoneValidation,
     PercentUSAStateCodeValidation,
     PercentUSAZipCodeValidation,
@@ -165,6 +171,12 @@ class ValidationManager:
         ValidationFunction.PERCENT_ZERO.value: "PercentZeroValidation",
         ValidationFunction.COUNT_NEGATIVE.value: "CountNegativeValidation",
         ValidationFunction.PERCENT_NEGATIVE.value: "PercentNegativeValidation",
+        ValidationFunction.COUNT_TIMESTAMP_STRING.value: "CountTimeStampValidation",
+        ValidationFunction.PERCENT_TIMESTAMP_STRING.value: "PercentTimeStampValidation",
+        ValidationFunction.COUNT_NOT_IN_FUTURE.value: "CountNotInFutureValidation",
+        ValidationFunction.PERCENT_NOT_IN_FUTURE.value: "PercentNotInFutureValidation",
+        ValidationFunction.COUNT_DATE_NOT_IN_FUTURE.value: "CountDateNotInFutureValidation",
+        ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE.value: "PercentDateNotInFutureValidation",
     }
 
     def __init__(
diff --git a/dcs_core/core/validation/validity_validation.py b/dcs_core/core/validation/validity_validation.py
index da472a7..527160d 100644
--- a/dcs_core/core/validation/validity_validation.py
+++ b/dcs_core/core/validation/validity_validation.py
@@ -698,3 +698,129 @@ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
             raise NotImplementedError(
                 "Perm ID validation is only supported for SQL data sources"
             )
+
+
+class CountTimeStampValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> Union[float, int]:
+        if isinstance(self.data_source, SQLDataSource):
+            valid_count, total_row_count = self.data_source.query_timestamp_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="count",
+                predefined_regex="timestamp_iso",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+            return valid_count
+        else:
+            raise ValueError(
+                "Unsupported data source type for CountTimeStampValidation"
+            )
+
+
+class PercentTimeStampValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> Union[float, int]:
+        if isinstance(self.data_source, SQLDataSource):
+            valid_count, total_row_count = self.data_source.query_timestamp_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="percent",
+                predefined_regex="timestamp_iso",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+            return (
+                round((valid_count / total_row_count) * 100, 2)
+                if total_row_count > 0
+                else 0.0
+            )
+        else:
+            raise ValueError(
+                "Unsupported data source type for PercentTimeStampValidation"
+            )
+
+
+class CountNotInFutureValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> Union[float, int]:
+        if isinstance(self.data_source, SQLDataSource):
+            (
+                valid_count,
+                total_row_count,
+            ) = self.data_source.query_timestamp_not_in_future_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="count",
+                predefined_regex="timestamp_iso",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+            return valid_count
+        else:
+            raise ValueError(
+                "Unsupported data source type for CountNotInFutureValidation"
+            )
+
+
+class PercentNotInFutureValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> Union[float, int]:
+        if isinstance(self.data_source, SQLDataSource):
+            (
+                valid_count,
+                total_row_count,
+            ) = self.data_source.query_timestamp_not_in_future_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="percent",
+                predefined_regex="timestamp_iso",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+            return (
+                round((valid_count / total_row_count) * 100, 2)
+                if total_row_count > 0
+                else 0.0
+            )
+        else:
+            raise ValueError(
+                "Unsupported data source type for PercentNotInFutureValidation"
+            )
+
+
+class CountDateNotInFutureValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> Union[float, int]:
+        if isinstance(self.data_source, SQLDataSource):
+            (
+                valid_count,
+                total_row_count,
+            ) = self.data_source.query_timestamp_date_not_in_future_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="count",
+                predefined_regex="timestamp_iso",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+            return valid_count
+        else:
+            raise ValueError(
+                "Unsupported data source type for CountDateNotInFutureValidation"
+            )
+
+
+class PercentDateNotInFutureValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> Union[float, int]:
+        if isinstance(self.data_source, SQLDataSource):
+            (
+                valid_count,
+                total_row_count,
+            ) = self.data_source.query_timestamp_date_not_in_future_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="percent",
+                predefined_regex="timestamp_iso",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+            return (
+                round((valid_count / total_row_count) * 100, 2)
+                if total_row_count > 0
+                else 0.0
+            )
+        else:
+            raise ValueError(
+                "Unsupported data source type for PercentDateNotInFutureValidation"
+            )
diff --git a/docs/validations/validity.md b/docs/validations/validity.md
index 4ab33da..bbe74aa 100644
--- a/docs/validations/validity.md
+++ b/docs/validations/validity.md
@@ -277,7 +277,7 @@ validations for location_db.geolocation:
       on: percent_longitude(longitude_column_name)
       threshold: "> 80"
 ```
-```
+
 
 ## Count SSN
 
@@ -505,3 +505,74 @@ validations for product_db.products:
       on: percent_negative(price)
       threshold: "< 40"
 ```
+## Count Timestamp String
+
+The count timestamp string validation checks the number of valid timestamp string in ISO format in a dataset.
+
+**Example**
+
+```yaml title="dcs_config.yaml"
+validations for product_db.products:
+  - count_valid_timestamp:
+     on: count_timestamp_string(timestamp)
+```
+
+## Percent Timestamp String
+
+The percent timestamp string validation checks the percentage of valid timestamp string in ISO format in a dataset.
+
+**Example**
+
+```yaml title="dcs_config.yaml"
+validations for product_db.products:
+  - percent_valid_timestamp:
+     on: percent_timestamp_string(timestamp)
+```
+
+## Count Not In Future
+
+The count not in future validation checks the number of valid timestamp string that are not in future in a dataset.
+
+**Example**
+
+```yaml title="dcs_config.yaml"
+validations for product_db.products:
+  - count_timestamp_not_in_future:
+     on: count_not_in_future(future_timestamp)
+```
+
+## Percent Not In Future
+
+The percent date not in future validation checks the percentage of valid timestamp string that are not in future in a dataset.
+
+**Example**
+
+```yaml title="dcs_config.yaml"
+validations for product_db.products:
+  - percent_timestamp_not_in_future:
+     on: percent_not_in_future(future_timestamp)
+```
+
+## Count Date Not In Future
+
+The count date not in future validation checks the number of valid timestamp string with date that are not in future in a dataset.
+
+**Example**
+
+```yaml title="dcs_config.yaml"
+validations for product_db.products:
+  - count_date_not_in_future:
+     on: count_date_not_in_future(future_timestamp)
+```
+
+## Percent Date Not In Future
+
+The percent date not in future validation checks the percentage of valid timestamp string with date that are not in future in a dataset.
+
+**Example**
+
+```yaml title="dcs_config.yaml"
+validations for product_db.products:
+  - percent_date_not_in_future:
+     on: percent_date_not_in_future(future_timestamp)
+```
diff --git a/tests/core/configuration/test_configuration_v1.py b/tests/core/configuration/test_configuration_v1.py
index aa756b3..771bc31 100644
--- a/tests/core/configuration/test_configuration_v1.py
+++ b/tests/core/configuration/test_configuration_v1.py
@@ -1025,3 +1025,93 @@ def test_should_parse_percent_negative_validation():
         .get_validation_function
         == ValidationFunction.PERCENT_NEGATIVE
     )
+
+
+def test_should_parse_count_timestamp_string():
+    yaml_string = """
+    validations for source.table:
+      - test:
+          on: count_timestamp_string(timestamp)
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["source.table"]
+        .validations["test"]
+        .get_validation_function
+        == ValidationFunction.COUNT_TIMESTAMP_STRING
+    )
+
+
+def test_should_parse_percent_timestamp_string():
+    yaml_string = """
+    validations for source.table:
+      - test:
+          on: percent_timestamp_string(timestamp)
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["source.table"]
+        .validations["test"]
+        .get_validation_function
+        == ValidationFunction.PERCENT_TIMESTAMP_STRING
+    )
+
+
+def test_should_parse_count_not_in_future():
+    yaml_string = """
+    validations for source.table:
+      - test:
+           on: count_not_in_future(future_timestamp)
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["source.table"]
+        .validations["test"]
+        .get_validation_function
+        == ValidationFunction.COUNT_NOT_IN_FUTURE
+    )
+
+
+def test_should_parse_percent_not_in_future():
+    yaml_string = """
+    validations for source.table:
+      - test:
+           on: percent_not_in_future(future_timestamp)
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["source.table"]
+        .validations["test"]
+        .get_validation_function
+        == ValidationFunction.PERCENT_NOT_IN_FUTURE
+    )
+
+
+def test_should_parse_count_date_not_in_future():
+    yaml_string = """
+    validations for source.table:
+      - test:
+          on: count_date_not_in_future(future_timestamp)
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["source.table"]
+        .validations["test"]
+        .get_validation_function
+        == ValidationFunction.COUNT_DATE_NOT_IN_FUTURE
+    )
+
+
+def test_should_parse_percent_date_not_in_future():
+    yaml_string = """
+    validations for source.table:
+      - test:
+          on: percent_date_not_in_future(future_timestamp)
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["source.table"]
+        .validations["test"]
+        .get_validation_function
+        == ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE
+    )
diff --git a/tests/integration/datasource/test_sql_datasource.py b/tests/integration/datasource/test_sql_datasource.py
index d5f414e..8a0e168 100644
--- a/tests/integration/datasource/test_sql_datasource.py
+++ b/tests/integration/datasource/test_sql_datasource.py
@@ -132,7 +132,10 @@ def setup_tables(
                             isin VARCHAR(12),
                             perm_id VARCHAR(50),
                             salary INTEGER,
-                            price FLOAT
+                            price FLOAT,
+                            timestamp VARCHAR(50),
+                            not_in_future VARCHAR(50),
+                            date_not_in_future VARCHAR(50)
                         )
                     """
                 )
@@ -144,27 +147,33 @@ def setup_tables(
                 ('thor', '{(utc_now - datetime.timedelta(days=10)).strftime("%Y-%m-%d")}',
                     1500, NULL, 'thor hammer', 'e7194aaa-5516-4362-a5ff-6ff971976bec',
                     '123-456-7890', 'jane.doe@domain', 'C2', 'ABCDE', 40.0678, -7555555554.0060,'856-45-6789','0067340',
-                    'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0), -- invalid email -- invalid usa_state_code  -- invalid usa_zip_code -- invalid cusip -- invalid perm_id
+                    'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0,'2024-01-15T12:30:45Z',
+                    '2024-09-06T01:15:00Z','2023-12-31T23:59:59+01:00'), -- invalid email -- invalid usa_state_code  -- invalid usa_zip_code -- invalid cusip -- invalid perm_id
                 ('captain america', '{(utc_now - datetime.timedelta(days=3)).strftime("%Y-%m-%d")}',
                     90, 80, 'shield', 'e7194aaa-5516-4362-a5ff-6ff971976b', '(123) 456-7890',
                     'john.doe@.com ', 'NY', '12-345', 34.0522, -118.2437,'000-12-3456', 'B01HL06',
-                    'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn
+                    'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0,'2021-06-15T08:22:33.123Z',
+                    '2024-08-25T09:15:00Z','2024-08-25T09:15:00Z'), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn
                 ('iron man', '{(utc_now - datetime.timedelta(days=4)).strftime("%Y-%m-%d")}',
                     50, 70, 'suit', '1739c676-6108-4dd2-8984-2459df744936', '123 456 7890',
                     'contact@company..org', 'XY', '85001', 37.7749, -122.4194,'859-99-9999','4155586',
-                    'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id
+                    'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0,'2024-04-31T12:30:45Z',
+                    '2024-10-07T12:00:00Z','2024-10-10T12:00:00Z'), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id
                 ('hawk eye', '{(utc_now - datetime.timedelta(days=5)).strftime("%Y-%m-%d")}',
                     40, 60, 'bow', '1739c676-6108-4dd2-8984-2459df746', '+1 123-456-7890',
                     'user@@example.com', 'TX', '30301', 51.1657, 10.4515,'123-45-67890','12345',
-                    'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol
+                    'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0,'2023-13-01T00:00:00Z',
+                    '2025-01-01T00:00:00+01:00','2024-09-06T01:15:00Z'), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol
                 ('clark kent', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}',
                     35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8', '09123.456.7890',
                     'contact@company.org', 'ZZ', '123456', 51.5074, -0.1278,'666-45-6789','34A56B7',
-                    '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id
+                    '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0,'2023-03-08T16:45:00+02:00',
+                    '2024-12-31T23:59:59Z','2024-09-06T01:15:00Z'), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id
                 ('black widow', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}',
                     35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8032c73', '+1 (123) 456-7890',
                     'jane_smith123@domain.co.uk', 'FL', '90210', 483.8566, 2.3522,'001-01-0001','456VGHY',
-                    '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0) -- invalid isin -- invalid sedol
+                    '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0,'2021-06-15T08:22:33.123Z',
+                    '2024-10-10T12:00:00Z','2024-09-05T23:59:59Z') -- invalid isin -- invalid sedol
             """
 
             postgresql_connection.execute(text(insert_query))
@@ -641,3 +650,48 @@ def test_should_return_percent_negative(
             table=self.TABLE_NAME, field="price", operation="percent"
         )
         assert round(percent_negative, 2) == 50.0
+
+    def test_should_return_row_count_for_valid_timestamp_string(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        (
+            valid_count,
+            total_row_count,
+        ) = postgres_datasource.query_timestamp_metric(
+            table=self.TABLE_NAME,
+            field="timestamp",
+            operation="count",
+            predefined_regex="timestamp_iso",
+        )
+        assert valid_count == 4
+        assert total_row_count == 6
+
+    def test_should_return_row_count_for_valid_timestamp_not_in_future(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        (
+            valid_count,
+            total_row_count,
+        ) = postgres_datasource.query_timestamp_not_in_future_metric(
+            table=self.TABLE_NAME,
+            field="not_in_future",
+            operation="count",
+            predefined_regex="timestamp_iso",
+        )
+        assert valid_count == 2
+        assert total_row_count == 6
+
+    def test_should_return_row_count_for_valid_timestamp_date_not_in_future(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        (
+            valid_count,
+            total_row_count,
+        ) = postgres_datasource.query_timestamp_date_not_in_future_metric(
+            table=self.TABLE_NAME,
+            field="date_not_in_future",
+            operation="count",
+            predefined_regex="timestamp_iso",
+        )
+        assert valid_count == 5
+        assert total_row_count == 6