diff --git a/sdmetrics/column_pairs/statistical/contingency_similarity.py b/sdmetrics/column_pairs/statistical/contingency_similarity.py index 5d2c801d..e41075e0 100644 --- a/sdmetrics/column_pairs/statistical/contingency_similarity.py +++ b/sdmetrics/column_pairs/statistical/contingency_similarity.py @@ -44,7 +44,7 @@ def compute(cls, real_data, synthetic_data): contingency_synthetic = synthetic.groupby(list(columns), dropna=False).size() / len( synthetic ) - combined_index = contingency_real.index.union(contingency_synthetic.index) + combined_index = contingency_real.index.union(contingency_synthetic.index, sort=False) contingency_synthetic = contingency_synthetic.reindex(combined_index, fill_value=0) contingency_real = contingency_real.reindex(combined_index, fill_value=0) diff = abs(contingency_real - contingency_synthetic).fillna(0) diff --git a/tests/integration/reports/multi_table/test_quality_report.py b/tests/integration/reports/multi_table/test_quality_report.py index 9c3b79b1..fb571e81 100644 --- a/tests/integration/reports/multi_table/test_quality_report.py +++ b/tests/integration/reports/multi_table/test_quality_report.py @@ -342,7 +342,7 @@ def test_quality_report_with_errors(): None, ], }) - assert score == 0.7249603174603174 + assert score == 0.7249603174603175 pd.testing.assert_frame_equal(properties, expected_properties) pd.testing.assert_frame_equal(details_column_shapes, expected_details) diff --git a/tests/unit/column_pairs/statistical/test_contingency_similarity.py b/tests/unit/column_pairs/statistical/test_contingency_similarity.py index 713aab7d..dc7bafdd 100644 --- a/tests/unit/column_pairs/statistical/test_contingency_similarity.py +++ b/tests/unit/column_pairs/statistical/test_contingency_similarity.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pandas as pd +import pytest from sdmetrics.column_pairs.statistical import ContingencySimilarity @@ -53,3 +54,15 @@ def test_normalize(self, normalize_mock): # Assert normalize_mock.assert_called_once_with(raw_score) assert result == normalize_mock.return_value + + @pytest.mark.filterwarnings('error:.*The values in the array are unorderable.*:RuntimeWarning') + def test_no_runtime_warning_raised(self): + """Test that no RuntimeWarning warning is raised when the metric is computed.""" + # Setup + real_data = pd.DataFrame(data={'A': ['value'] * 4, 'B': ['1', '2', '3', pd.NA]}) + synthetic_data = pd.DataFrame(data={'A': ['value'] * 3, 'B': ['1', '2', pd.NA]}) + + # Run and Assert + ContingencySimilarity.compute( + real_data=real_data[['A', 'B']], synthetic_data=synthetic_data[['A', 'B']] + )