diff --git a/soda/core/soda/execution/metric/numeric_query_metric.py b/soda/core/soda/execution/metric/numeric_query_metric.py index acc51b1f0..5e7c70acf 100644 --- a/soda/core/soda/execution/metric/numeric_query_metric.py +++ b/soda/core/soda/execution/metric/numeric_query_metric.py @@ -306,7 +306,7 @@ def create_failed_rows_sample_query(self) -> SampleQuery | None: passing_where_clauses.append(resolved_filter) if self.name == "missing_count": - where_clauses.append(self.build_missing_condition()) + where_clauses.append(f"({self.build_missing_condition()})") passing_where_clauses.append(f"NOT ({self.build_missing_condition()})") elif self.name == "invalid_count": where_clauses.append(f"NOT ({self.build_missing_condition()})") @@ -315,16 +315,16 @@ def create_failed_rows_sample_query(self) -> SampleQuery | None: valid_condition = self.build_valid_condition() if valid_condition: where_clauses.append(f"NOT ({valid_condition})") - passing_where_clauses.append(valid_condition) + passing_where_clauses.append(f"({valid_condition})") invalid_condition = self.build_invalid_condition() if invalid_condition: passing_where_clauses.append(f"NOT ({invalid_condition})") - where_clauses.append(invalid_condition) + where_clauses.append(f"({invalid_condition})") if self.filter: - where_clauses.append(self.filter) - passing_where_clauses.append(self.filter) + where_clauses.append(f"({self.filter})") + passing_where_clauses.append(f"({self.filter})") where_sql = " AND ".join(where_clauses) passing_where_sql = " AND ".join(passing_where_clauses) diff --git a/soda/core/tests/data_source/test_metric_check_filter.py b/soda/core/tests/data_source/test_metric_check_filter.py index bfaac00e8..74c12fe9a 100644 --- a/soda/core/tests/data_source/test_metric_check_filter.py +++ b/soda/core/tests/data_source/test_metric_check_filter.py @@ -39,6 +39,31 @@ def test_missing_filtered(data_source_fixture: DataSourceFixture): scan.assert_all_checks_pass() +def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture): + table_name = data_source_fixture.ensure_test_table(customers_test_table) + + # Row count is 10 + scan = data_source_fixture.create_test_scan() + mock_soda_cloud = scan.enable_mock_soda_cloud() + scan.add_sodacl_yaml_str( + f""" + checks for {table_name}: + - missing_count(pct) = 1: + missing values: [No value, N/A, error] + filter: country = 'NL' + """ + ) + scan.execute() + + scan.assert_all_checks_fail() + + failing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "failingRowsQueryName") + assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_condition + + passing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "passingRowsQueryName") + assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_condition + + @pytest.mark.skipif( test_data_source == "sqlserver", reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.", diff --git a/soda/core/tests/data_source/test_pass_fail_queries.py b/soda/core/tests/data_source/test_pass_fail_queries.py index b868421ca..60d2310d9 100644 --- a/soda/core/tests/data_source/test_pass_fail_queries.py +++ b/soda/core/tests/data_source/test_pass_fail_queries.py @@ -115,11 +115,11 @@ def test_with_multiple_aggregate_checks(data_source_fixture: DataSourceFixture): assert "cat.failed_rows[missing_count].failing_sql" in block_cat["failingRowsQueryName"] assert "cat.failed_rows[missing_count].passing_sql" in block_cat["passingRowsQueryName"] cat_failing_query = _find_query_by_name(result["queries"], block_cat["failingRowsQueryName"]) - assert "where cat is null" in cat_failing_query["sql"].lower() + assert "where (cat is null)" in cat_failing_query["sql"].lower() block_id = mock_soda_cloud.find_failed_rows_diagnostics_block(1) assert block_cat["type"] == "failedRowsAnalysis" assert "id.failed_rows[missing_count].failing_sql" in block_id["failingRowsQueryName"] assert "id.failed_rows[missing_count].passing_sql" in block_id["passingRowsQueryName"] id_failing_query = _find_query_by_name(result["queries"], block_id["failingRowsQueryName"]) - assert "where id is null" in id_failing_query["sql"].lower() + assert "where (id is null)" in id_failing_query["sql"].lower() diff --git a/soda/core/tests/helpers/mock_soda_cloud.py b/soda/core/tests/helpers/mock_soda_cloud.py index 56a6bad16..345367821 100644 --- a/soda/core/tests/helpers/mock_soda_cloud.py +++ b/soda/core/tests/helpers/mock_soda_cloud.py @@ -134,6 +134,16 @@ def get_check_attributes_schema(self): def pop_scan_result(self) -> dict: return self.scan_results.pop() + def find_queries(self, query_name: str): + assert len(self.scan_results) > 0 + scan_result = self.scan_results[0] + self.assert_key("queries", scan_result) + queries = scan_result["queries"] + for query in queries: + if query["name"] == query_name: + return query + return None + def find_check(self, check_index: int) -> dict | None: assert len(self.scan_results) > 0 scan_result = self.scan_results[0] @@ -175,6 +185,13 @@ def find_failed_rows_line_count(self, check_index: int) -> int: file_contents = self.find_failed_rows_content(check_index) return file_contents.count("\n") + def find_failed_rows_sample_query(self, check_index: int, query_type: str = "failingRowsQueryName"): + block = self.find_failed_rows_diagnostics_block(check_index) + assert block[query_type] + sample_query = self.find_queries(block[query_type]) + assert sample_query["sql"] + return sample_query["sql"].lower() + def assert_no_failed_rows_block_present(self, check_index: int): diagnostics = self.find_check_diagnostics(check_index)