From cb0f3976a5567faf51688b96493522cf2891d62d Mon Sep 17 00:00:00 2001
From: aman-waterdip <162797207+aman-waterdip@users.noreply.github.com>
Date: Wed, 4 Sep 2024 15:11:45 +0530
Subject: [PATCH] feat: negative value numeric validation function

---
 dcs_core/core/common/models/validation.py     |  2 +
 dcs_core/core/datasource/sql_datasource.py    | 25 +++++++++++
 dcs_core/core/validation/manager.py           |  4 ++
 .../core/validation/numeric_validation.py     | 28 +++++++++++++
 docs/validations/validity.md                  | 32 ++++++++++++++-
 .../postgres/example_postgres_config.yaml     |  9 ++++
 .../configuration/test_configuration_v1.py    | 32 +++++++++++++++
 .../datasource/test_sql_datasource.py         | 41 +++++++++++++------
 8 files changed, 160 insertions(+), 13 deletions(-)

diff --git a/dcs_core/core/common/models/validation.py b/dcs_core/core/common/models/validation.py
index e4587223..cf9bd70a 100644
--- a/dcs_core/core/common/models/validation.py
+++ b/dcs_core/core/common/models/validation.py
@@ -80,6 +80,8 @@ class ValidationFunction(str, Enum):
     PERCENTILE_60 = "percentile_60"
     PERCENTILE_80 = "percentile_80"
     PERCENTILE_90 = "percentile_90"
+    COUNT_NEGATIVE = "count_negative"
+    PERCENT_NEGATIVE = "percent_negative"
 
     # Reliability validations 3
     COUNT_ROWS = "count_rows"
diff --git a/dcs_core/core/datasource/sql_datasource.py b/dcs_core/core/datasource/sql_datasource.py
index 1020192d..3862ea3b 100644
--- a/dcs_core/core/datasource/sql_datasource.py
+++ b/dcs_core/core/datasource/sql_datasource.py
@@ -622,3 +622,28 @@ def query_get_percentile(
         if filters:
             query += f" WHERE {filters}"
         return round(self.fetchone(query)[0], 2)
+
+    def query_negative_metric(
+        self, table: str, field: str, operation: str, filters: str = None
+    ) -> Union[int, float]:
+        qualified_table_name = self.qualified_table_name(table)
+
+        negative_query = (
+            f"SELECT COUNT(*) FROM {qualified_table_name} WHERE {field} < 0"
+        )
+
+        if filters:
+            negative_query += f" AND {filters}"
+
+        total_count_query = f"SELECT COUNT(*) FROM {qualified_table_name}"
+
+        if filters:
+            total_count_query += f" WHERE {filters}"
+
+        if operation == "percent":
+            query = f"SELECT (CAST(({negative_query}) AS float) / CAST(({total_count_query}) AS float)) * 100"
+        else:
+            query = negative_query
+
+        result = self.fetchone(query)[0]
+        return round(result, 2) if operation == "percent" else result
diff --git a/dcs_core/core/validation/manager.py b/dcs_core/core/validation/manager.py
index 2da766fa..36d4430f 100644
--- a/dcs_core/core/validation/manager.py
+++ b/dcs_core/core/validation/manager.py
@@ -32,6 +32,7 @@
 )
 from dcs_core.core.validation.numeric_validation import (  # noqa F401 this is used in globals
     AvgValidation,
+    CountNegativeValidation,
     MaxValidation,
     MinValidation,
     Percentile20Validation,
@@ -39,6 +40,7 @@
     Percentile60Validation,
     Percentile80Validation,
     Percentile90Validation,
+    PercentNegativeValidation,
     StdDevValidation,
     SumValidation,
     VarianceValidation,
@@ -157,6 +159,8 @@ class ValidationManager:
         ValidationFunction.PERCENTILE_60.value: "Percentile60Validation",
         ValidationFunction.PERCENTILE_80.value: "Percentile80Validation",
         ValidationFunction.PERCENTILE_90.value: "Percentile90Validation",
+        ValidationFunction.COUNT_NEGATIVE.value: "CountNegativeValidation",
+        ValidationFunction.PERCENT_NEGATIVE.value: "PercentNegativeValidation",
     }
 
     def __init__(
diff --git a/dcs_core/core/validation/numeric_validation.py b/dcs_core/core/validation/numeric_validation.py
index 14617292..b28b4fb8 100644
--- a/dcs_core/core/validation/numeric_validation.py
+++ b/dcs_core/core/validation/numeric_validation.py
@@ -190,3 +190,31 @@ def _generate_metric_value(self, **kwargs) -> float:
             )
         else:
             raise ValueError("Unsupported data source type for Percentile90Validation")
+
+
+class CountNegativeValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> int:
+        if isinstance(self.data_source, SQLDataSource):
+            return self.data_source.query_negative_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="count",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+        else:
+            raise ValueError("Unsupported data source type for CountNegativeValidation")
+
+
+class PercentNegativeValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> float:
+        if isinstance(self.data_source, SQLDataSource):
+            return self.data_source.query_negative_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="percent",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+        else:
+            raise ValueError(
+                "Unsupported data source type for PercentNegativeValidation"
+            )
diff --git a/docs/validations/validity.md b/docs/validations/validity.md
index 05f70575..39061cbe 100644
--- a/docs/validations/validity.md
+++ b/docs/validations/validity.md
@@ -445,4 +445,34 @@ The percent permid validation checks the percentage of valid permid in a dataset
 validations for product_db.products:
   - percent_permid_of_user:
       on: percent_permid(perm_id)
-```
\ No newline at end of file
+```
+
+# **Numeric Negative Value Validations**
+
+The Numeric Negative Value Validations detect negative values in numeric fields within a dataset and ensure that they do not exceed or fall below a specified threshold.
+
+## **COUNT_NEGATIVE**
+
+This validation counts the number of negative values present in a given numeric field.
+
+**Example**
+
+```yaml
+validations for product_db.products:
+  - negative value count should be less than 2:
+      on: count_negative(price)
+      threshold: "< 2"
+```
+
+## **PERCENT_NEGATIVE**
+
+This validation calculates the percentage of negative values in a numeric field, relative to the total number of records.
+
+**Example**
+
+```yaml
+validations for product_db.products:
+  - negative value percentage should be less than 40%:
+      on: percent_negative(price)
+      threshold: "< 40"
+```
diff --git a/examples/configurations/postgres/example_postgres_config.yaml b/examples/configurations/postgres/example_postgres_config.yaml
index 84b96d6b..34d63a50 100644
--- a/examples/configurations/postgres/example_postgres_config.yaml
+++ b/examples/configurations/postgres/example_postgres_config.yaml
@@ -19,6 +19,15 @@ validations for iris_pgsql.dcs_iris:
       on: stddev(sepal_length)
       threshold: "< 0.5"
 
+  # **Negative Value Validations**
+  - price negative value count:
+      on: count_negative(price)
+      threshold: "< 2"
+
+  - price negative value percentage:
+      on: percent_negative(price)
+      threshold: "< 40"
+
   # Uniqueness Metrics
   - species duplicate count:
       on: count_duplicate(species)
diff --git a/tests/core/configuration/test_configuration_v1.py b/tests/core/configuration/test_configuration_v1.py
index 5f31e82f..e48177b1 100644
--- a/tests/core/configuration/test_configuration_v1.py
+++ b/tests/core/configuration/test_configuration_v1.py
@@ -949,3 +949,35 @@ def test_should_parse_90th_percentile_validation():
         .get_validation_function
         == ValidationFunction.PERCENTILE_90
     )
+
+
+def test_should_parse_count_negative_validation():
+    yaml_string = """
+    validations for product_db.products:
+      - count_negative for price should be less than 2:
+          on: count_negative(price)
+          threshold: "< 2"
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["product_db.products"]
+        .validations["count_negative for price should be less than 2"]
+        .get_validation_function
+        == ValidationFunction.COUNT_NEGATIVE
+    )
+
+
+def test_should_parse_percent_negative_validation():
+    yaml_string = """
+    validations for product_db.products:
+      - percent_negative for price should be less than 40%:
+          on: percent_negative(price)
+          threshold: "< 40"
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["product_db.products"]
+        .validations["percent_negative for price should be less than 40%"]
+        .get_validation_function
+        == ValidationFunction.PERCENT_NEGATIVE
+    )
diff --git a/tests/integration/datasource/test_sql_datasource.py b/tests/integration/datasource/test_sql_datasource.py
index c196cf9d..02ffb9fc 100644
--- a/tests/integration/datasource/test_sql_datasource.py
+++ b/tests/integration/datasource/test_sql_datasource.py
@@ -130,7 +130,8 @@ def setup_tables(
                             cusip VARCHAR(9),
                             figi VARCHAR(12),
                             isin VARCHAR(12),
-                            perm_id VARCHAR(50)
+                            perm_id VARCHAR(50),
+                            price FLOAT
                         )
                     """
                 )
@@ -142,27 +143,27 @@ def setup_tables(
                 ('thor', '{(utc_now - datetime.timedelta(days=10)).strftime("%Y-%m-%d")}',
                     1500, NULL, 'thor hammer', 'e7194aaa-5516-4362-a5ff-6ff971976bec',
                     '123-456-7890', 'jane.doe@domain', 'C2', 'ABCDE', 40.0678, -7555555554.0060,'856-45-6789','0067340',
-                    'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789'), -- invalid email -- invalid usa_state_code  -- invalid usa_zip_code -- invalid cusip -- invalid perm_id
+                    'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 100.0), -- invalid email -- invalid usa_state_code  -- invalid usa_zip_code -- invalid cusip -- invalid perm_id
                 ('captain america', '{(utc_now - datetime.timedelta(days=3)).strftime("%Y-%m-%d")}',
                     90, 80, 'shield', 'e7194aaa-5516-4362-a5ff-6ff971976b', '(123) 456-7890',
                     'john.doe@.com ', 'NY', '12-345', 34.0522, -118.2437,'000-12-3456', 'B01HL06',
-                    'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789'), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn
+                    'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', -50.0), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn
                 ('iron man', '{(utc_now - datetime.timedelta(days=4)).strftime("%Y-%m-%d")}',
                     50, 70, 'suit', '1739c676-6108-4dd2-8984-2459df744936', '123 456 7890',
                     'contact@company..org', 'XY', '85001', 37.7749, -122.4194,'859-99-9999','4155586',
-                    'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678'), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id
+                    'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', -150.0), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id
                 ('hawk eye', '{(utc_now - datetime.timedelta(days=5)).strftime("%Y-%m-%d")}',
                     40, 60, 'bow', '1739c676-6108-4dd2-8984-2459df746', '+1 123-456-7890',
                     'user@@example.com', 'TX', '30301', 51.1657, 10.4515,'123-45-67890','12345',
-                    'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291'), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol
+                    'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 50.0), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol
                 ('clark kent', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}',
                     35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8', '09123.456.7890',
                     'contact@company.org', 'ZZ', '123456', 51.5074, -0.1278,'666-45-6789','34A56B7',
-                    '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X'), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id
+                    '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', -25.0), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id
                 ('black widow', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}',
                     35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8032c73', '+1 (123) 456-7890',
                     'jane_smith123@domain.co.uk', 'FL', '90210', 483.8566, 2.3522,'001-01-0001','456VGHY',
-                    '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890') -- invalid isin -- invalid sedol
+                    '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 30.0) -- invalid isin -- invalid sedol
             """
 
             postgresql_connection.execute(text(insert_query))
@@ -578,7 +579,7 @@ def test_should_return_20th_percentile_age(
         percentile_20 = postgres_datasource.query_get_percentile(
             table=self.TABLE_NAME, field="age", percentile=0.2
         )
-        assert percentile_20 == 35  # Expected 20th percentile value.
+        assert percentile_20 == 35
 
     def test_should_return_40th_percentile_age(
         self, postgres_datasource: PostgresDataSource
@@ -586,7 +587,7 @@ def test_should_return_40th_percentile_age(
         percentile_40 = postgres_datasource.query_get_percentile(
             table=self.TABLE_NAME, field="age", percentile=0.4
         )
-        assert percentile_40 == 40  # Expected 40th percentile value.
+        assert percentile_40 == 40
 
     def test_should_return_60th_percentile_age(
         self, postgres_datasource: PostgresDataSource
@@ -594,7 +595,7 @@ def test_should_return_60th_percentile_age(
         percentile_60 = postgres_datasource.query_get_percentile(
             table=self.TABLE_NAME, field="age", percentile=0.6
         )
-        assert percentile_60 == 50  # Expected 60th percentile value.
+        assert percentile_60 == 50
 
     def test_should_return_80th_percentile_age(
         self, postgres_datasource: PostgresDataSource
@@ -602,7 +603,7 @@ def test_should_return_80th_percentile_age(
         percentile_80 = postgres_datasource.query_get_percentile(
             table=self.TABLE_NAME, field="age", percentile=0.8
         )
-        assert percentile_80 == 90  # Expected 80th percentile value.
+        assert percentile_80 == 90
 
     def test_should_return_90th_percentile_age(
         self, postgres_datasource: PostgresDataSource
@@ -610,4 +611,20 @@ def test_should_return_90th_percentile_age(
         percentile_90 = postgres_datasource.query_get_percentile(
             table=self.TABLE_NAME, field="age", percentile=0.9
         )
-        assert percentile_90 == 1500  # Expected 90th percentile value.
+        assert percentile_90 == 1500
+
+    def test_should_return_count_negative(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        count_negative = postgres_datasource.query_negative_metric(
+            table=self.TABLE_NAME, field="price", operation="count"
+        )
+        assert count_negative == 3
+
+    def test_should_return_percent_negative(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        percent_negative = postgres_datasource.query_negative_metric(
+            table=self.TABLE_NAME, field="price", operation="percent"
+        )
+        assert round(percent_negative, 2) == 50.0