Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix wrong sample query for missing count. #2110

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions soda/core/soda/execution/metric/numeric_query_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def create_failed_rows_sample_query(self) -> SampleQuery | None:
passing_where_clauses.append(resolved_filter)

if self.name == "missing_count":
where_clauses.append(self.build_missing_condition())
where_clauses.append(f"({self.build_missing_condition()})")
passing_where_clauses.append(f"NOT ({self.build_missing_condition()})")
elif self.name == "invalid_count":
where_clauses.append(f"NOT ({self.build_missing_condition()})")
Expand All @@ -315,16 +315,16 @@ def create_failed_rows_sample_query(self) -> SampleQuery | None:
valid_condition = self.build_valid_condition()
if valid_condition:
where_clauses.append(f"NOT ({valid_condition})")
passing_where_clauses.append(valid_condition)
passing_where_clauses.append(f"({valid_condition})")

invalid_condition = self.build_invalid_condition()
if invalid_condition:
passing_where_clauses.append(f"NOT ({invalid_condition})")
where_clauses.append(invalid_condition)
where_clauses.append(f"({invalid_condition})")

if self.filter:
where_clauses.append(self.filter)
passing_where_clauses.append(self.filter)
where_clauses.append(f"({self.filter})")
passing_where_clauses.append(f"({self.filter})")

where_sql = " AND ".join(where_clauses)
passing_where_sql = " AND ".join(passing_where_clauses)
Expand Down
25 changes: 25 additions & 0 deletions soda/core/tests/data_source/test_metric_check_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,31 @@ def test_missing_filtered(data_source_fixture: DataSourceFixture):
scan.assert_all_checks_pass()


def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

# Row count is 10
scan = data_source_fixture.create_test_scan()
mock_soda_cloud = scan.enable_mock_soda_cloud()
scan.add_sodacl_yaml_str(
f"""
checks for {table_name}:
- missing_count(pct) = 1:
missing values: [No value, N/A, error]
filter: country = 'NL'
"""
)
scan.execute()

scan.assert_all_checks_fail()

failing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "failingRowsQueryName")
assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_condition

passing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "passingRowsQueryName")
assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_condition


@pytest.mark.skipif(
test_data_source == "sqlserver",
reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
Expand Down
4 changes: 2 additions & 2 deletions soda/core/tests/data_source/test_pass_fail_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ def test_with_multiple_aggregate_checks(data_source_fixture: DataSourceFixture):
assert "cat.failed_rows[missing_count].failing_sql" in block_cat["failingRowsQueryName"]
assert "cat.failed_rows[missing_count].passing_sql" in block_cat["passingRowsQueryName"]
cat_failing_query = _find_query_by_name(result["queries"], block_cat["failingRowsQueryName"])
assert "where cat is null" in cat_failing_query["sql"].lower()
assert "where (cat is null)" in cat_failing_query["sql"].lower()

block_id = mock_soda_cloud.find_failed_rows_diagnostics_block(1)
assert block_cat["type"] == "failedRowsAnalysis"
assert "id.failed_rows[missing_count].failing_sql" in block_id["failingRowsQueryName"]
assert "id.failed_rows[missing_count].passing_sql" in block_id["passingRowsQueryName"]
id_failing_query = _find_query_by_name(result["queries"], block_id["failingRowsQueryName"])
assert "where id is null" in id_failing_query["sql"].lower()
assert "where (id is null)" in id_failing_query["sql"].lower()
17 changes: 17 additions & 0 deletions soda/core/tests/helpers/mock_soda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ def get_check_attributes_schema(self):
def pop_scan_result(self) -> dict:
return self.scan_results.pop()

def find_queries(self, query_name: str):
assert len(self.scan_results) > 0
scan_result = self.scan_results[0]
self.assert_key("queries", scan_result)
queries = scan_result["queries"]
for query in queries:
if query["name"] == query_name:
return query
return None

def find_check(self, check_index: int) -> dict | None:
assert len(self.scan_results) > 0
scan_result = self.scan_results[0]
Expand Down Expand Up @@ -175,6 +185,13 @@ def find_failed_rows_line_count(self, check_index: int) -> int:
file_contents = self.find_failed_rows_content(check_index)
return file_contents.count("\n")

def find_failed_rows_sample_query(self, check_index: int, query_type: str = "failingRowsQueryName"):
block = self.find_failed_rows_diagnostics_block(check_index)
assert block[query_type]
sample_query = self.find_queries(block[query_type])
assert sample_query["sql"]
return sample_query["sql"].lower()

def assert_no_failed_rows_block_present(self, check_index: int):
diagnostics = self.find_check_diagnostics(check_index)

Expand Down
Loading