From 838c2001989a733602a4a41286691030c679bd6c Mon Sep 17 00:00:00 2001 From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:04:39 -0400 Subject: [PATCH 1/4] Run fix-lint during latest dependency check workflow (#650) --- .github/workflows/dependency_checker.yml | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dependency_checker.yml b/.github/workflows/dependency_checker.yml index 7900502e..589ce20d 100644 --- a/.github/workflows/dependency_checker.yml +++ b/.github/workflows/dependency_checker.yml @@ -16,6 +16,7 @@ jobs: run: | python -m pip install .[dev] make check-deps OUTPUT_FILEPATH=latest_requirements.txt + make fix-lint - name: Create pull request id: cpr uses: peter-evans/create-pull-request@v4 diff --git a/pyproject.toml b/pyproject.toml index d64d78f7..5a5bdeb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ dev = [ 'watchdog>=1.0.1,<5', # style check - 'ruff>=0.3.2,<0.7.2', + 'ruff>=0.3.2,<1', # distribute on PyPI 'twine>=1.10.0,<6', From 27c7e1d6d10fef246c1f4080789f69cbbedf0ef4 Mon Sep 17 00:00:00 2001 From: SDV Team <98988753+sdv-team@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:46:11 -0500 Subject: [PATCH 2/4] Automated Latest Dependency Updates (#651) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- latest_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/latest_requirements.txt b/latest_requirements.txt index 4bb9945c..691f176d 100644 --- a/latest_requirements.txt +++ b/latest_requirements.txt @@ -4,4 +4,4 @@ pandas==2.2.3 plotly==5.24.1 scikit-learn==1.5.2 scipy==1.13.1 -tqdm==4.66.5 +tqdm==4.66.6 From 369613ba3c83d1afc275438debc5cc88c0eb7e3c Mon Sep 17 00:00:00 2001 From: SDV Team <98988753+sdv-team@users.noreply.github.com> Date: Tue, 12 Nov 2024 09:10:12 -0500 Subject: [PATCH 3/4] Automated Latest Dependency Updates (#655) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- latest_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/latest_requirements.txt b/latest_requirements.txt index 691f176d..aed0be91 100644 --- a/latest_requirements.txt +++ b/latest_requirements.txt @@ -4,4 +4,4 @@ pandas==2.2.3 plotly==5.24.1 scikit-learn==1.5.2 scipy==1.13.1 -tqdm==4.66.6 +tqdm==4.67.0 From 838e81db5556630d82195d93c2d16a9bfa94c7a8 Mon Sep 17 00:00:00 2001 From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com> Date: Thu, 14 Nov 2024 13:34:43 -0500 Subject: [PATCH 4/4] When running Quality Report, ContingencySimilarity produces a RuntimeWarning (`The values in the array are unorderable.`) (#657) --- .../statistical/contingency_similarity.py | 2 +- .../reports/multi_table/test_quality_report.py | 2 +- .../statistical/test_contingency_similarity.py | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sdmetrics/column_pairs/statistical/contingency_similarity.py b/sdmetrics/column_pairs/statistical/contingency_similarity.py index 5d2c801d..e41075e0 100644 --- a/sdmetrics/column_pairs/statistical/contingency_similarity.py +++ b/sdmetrics/column_pairs/statistical/contingency_similarity.py @@ -44,7 +44,7 @@ def compute(cls, real_data, synthetic_data): contingency_synthetic = synthetic.groupby(list(columns), dropna=False).size() / len( synthetic ) - combined_index = contingency_real.index.union(contingency_synthetic.index) + combined_index = contingency_real.index.union(contingency_synthetic.index, sort=False) contingency_synthetic = contingency_synthetic.reindex(combined_index, fill_value=0) contingency_real = contingency_real.reindex(combined_index, fill_value=0) diff = abs(contingency_real - contingency_synthetic).fillna(0) diff --git a/tests/integration/reports/multi_table/test_quality_report.py b/tests/integration/reports/multi_table/test_quality_report.py index 9c3b79b1..fb571e81 100644 --- a/tests/integration/reports/multi_table/test_quality_report.py +++ b/tests/integration/reports/multi_table/test_quality_report.py @@ -342,7 +342,7 @@ def test_quality_report_with_errors(): None, ], }) - assert score == 0.7249603174603174 + assert score == 0.7249603174603175 pd.testing.assert_frame_equal(properties, expected_properties) pd.testing.assert_frame_equal(details_column_shapes, expected_details) diff --git a/tests/unit/column_pairs/statistical/test_contingency_similarity.py b/tests/unit/column_pairs/statistical/test_contingency_similarity.py index 713aab7d..dc7bafdd 100644 --- a/tests/unit/column_pairs/statistical/test_contingency_similarity.py +++ b/tests/unit/column_pairs/statistical/test_contingency_similarity.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pandas as pd +import pytest from sdmetrics.column_pairs.statistical import ContingencySimilarity @@ -53,3 +54,15 @@ def test_normalize(self, normalize_mock): # Assert normalize_mock.assert_called_once_with(raw_score) assert result == normalize_mock.return_value + + @pytest.mark.filterwarnings('error:.*The values in the array are unorderable.*:RuntimeWarning') + def test_no_runtime_warning_raised(self): + """Test that no RuntimeWarning warning is raised when the metric is computed.""" + # Setup + real_data = pd.DataFrame(data={'A': ['value'] * 4, 'B': ['1', '2', '3', pd.NA]}) + synthetic_data = pd.DataFrame(data={'A': ['value'] * 3, 'B': ['1', '2', pd.NA]}) + + # Run and Assert + ContingencySimilarity.compute( + real_data=real_data[['A', 'B']], synthetic_data=synthetic_data[['A', 'B']] + )