From 7b4fc9fd3cfc7b2ac38dedef9d212674265fecb2 Mon Sep 17 00:00:00 2001 From: John La Date: Wed, 26 Jun 2024 11:12:33 -0500 Subject: [PATCH] Fix tests failing due to scipy 1.14 (#601) --- pyproject.toml | 6 +-- .../multi_table/test_quality_report.py | 2 +- .../_properties/test_column_pair_trends.py | 17 +++++-- .../single_table/test_quality_report.py | 44 +++++++++++-------- tests/utils.py | 7 +++ 5 files changed, 50 insertions(+), 26 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0e4603b0..eb62b32b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,9 @@ dependencies = [ "scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'", "scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'", "scikit-learn>=1.3.1;python_version>='3.12'", - "scipy>=1.7.3,<1.14.0;python_version<'3.10'", - "scipy>=1.9.2,<1.14.0;python_version>='3.10' and python_version<'3.12'", - "scipy>=1.12.0,<1.14.0;python_version>='3.12'", + "scipy>=1.7.3;python_version<'3.10'", + "scipy>=1.9.2;python_version>='3.10' and python_version<'3.12'", + "scipy>=1.12.0;python_version>='3.12'", 'copulas>=0.11.0', 'tqdm>=4.29', 'plotly>=5.19.0', diff --git a/tests/integration/reports/multi_table/test_quality_report.py b/tests/integration/reports/multi_table/test_quality_report.py index eba1d3fe..07593d9f 100644 --- a/tests/integration/reports/multi_table/test_quality_report.py +++ b/tests/integration/reports/multi_table/test_quality_report.py @@ -107,7 +107,7 @@ def test_multi_table_quality_report(): details.append(report.get_details(property_)) # Assert score - assert score == 0.649582127409184 + assert round(score, 15) == 0.649582127409184 pd.testing.assert_frame_equal( properties, pd.DataFrame({ diff --git a/tests/integration/reports/single_table/_properties/test_column_pair_trends.py b/tests/integration/reports/single_table/_properties/test_column_pair_trends.py index 3f34ed07..ef6bd116 100644 --- a/tests/integration/reports/single_table/_properties/test_column_pair_trends.py +++ b/tests/integration/reports/single_table/_properties/test_column_pair_trends.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +from tests.utils import get_error_type from sdmetrics.demos import load_demo from sdmetrics.reports.single_table._properties.column_pair_trends import ColumnPairTrends @@ -79,17 +80,25 @@ def test_get_score_warnings(self, recwarn): # Run column_pair_trends = ColumnPairTrends() - exp_message_1 = "ValueError: could not convert string to float: 'a'" + exp_message_1 = 'ValueError' - exp_message_2 = "TypeError: '<=' not supported between instances of 'float' and 'str'" + exp_message_2 = 'TypeError' - exp_error_serie = pd.Series([exp_message_1, None, None, exp_message_2, exp_message_2, None]) + exp_error_series = pd.Series([ + exp_message_1, + None, + None, + exp_message_2, + exp_message_2, + None, + ]) score = column_pair_trends.get_score(real_data, synthetic_data, metadata) # Assert details = column_pair_trends.details - pd.testing.assert_series_equal(details['Error'], exp_error_serie, check_names=False) + details['Error'] = details['Error'].apply(get_error_type) + pd.testing.assert_series_equal(details['Error'], exp_error_series, check_names=False) assert score == 0.7751937984496124 def test_only_categorical_columns(self): diff --git a/tests/integration/reports/single_table/test_quality_report.py b/tests/integration/reports/single_table/test_quality_report.py index 27698100..50891413 100644 --- a/tests/integration/reports/single_table/test_quality_report.py +++ b/tests/integration/reports/single_table/test_quality_report.py @@ -6,6 +6,7 @@ from sdmetrics.demos import load_demo from sdmetrics.reports.single_table import QualityReport +from tests.utils import get_error_type class TestQualityReport: @@ -262,7 +263,7 @@ def test_report_end_to_end_with_errors(self): 'Score': [0.6621621621621622, np.nan, 0.9953488372093023, 0.9395348837209302], 'Error': [ None, - "TypeError: '<' not supported between instances of 'str' and 'float'", + 'TypeError', None, None, ], @@ -304,23 +305,25 @@ def test_report_end_to_end_with_errors(self): 'Real Correlation': [np.nan] * 6, 'Synthetic Correlation': [np.nan] * 6, 'Error': [ - "ValueError: could not convert string to float: 'a'", + 'ValueError', None, None, - "TypeError: '<=' not supported between instances of 'float' and 'str'", - "TypeError: '<=' not supported between instances of 'float' and 'str'", + 'TypeError', + 'TypeError', None, ], } expected_details_column_shapes = pd.DataFrame(expected_details_column_shapes_dict) expected_details_cpt = pd.DataFrame(expected_details_cpt__dict) - pd.testing.assert_frame_equal( - report.get_details('Column Shapes'), expected_details_column_shapes - ) - pd.testing.assert_frame_equal( - report.get_details('Column Pair Trends'), expected_details_cpt - ) + # Errors may change based on versions of scipy installed. + col_shape_report = report.get_details('Column Shapes') + col_pair_report = report.get_details('Column Pair Trends') + col_shape_report['Error'] = col_shape_report['Error'].apply(get_error_type) + col_pair_report['Error'] = col_pair_report['Error'].apply(get_error_type) + + pd.testing.assert_frame_equal(col_shape_report, expected_details_column_shapes) + pd.testing.assert_frame_equal(col_pair_report, expected_details_cpt) assert report.get_score() == 0.8204378797402054 def test_report_with_column_nan(self): @@ -446,10 +449,10 @@ def test_report_with_column_nan(self): None, None, None, - 'ValueError: x and y must have length at least 2.', + 'ValueError', None, None, - 'ValueError: x and y must have length at least 2.', + 'ValueError', None, None, None, @@ -458,12 +461,17 @@ def test_report_with_column_nan(self): expected_details_column_shapes = pd.DataFrame(expected_details_column_shapes_dict) expected_details_cpt = pd.DataFrame(expected_details_cpt__dict) - pd.testing.assert_frame_equal( - report.get_details('Column Shapes'), expected_details_column_shapes - ) - pd.testing.assert_frame_equal( - report.get_details('Column Pair Trends'), expected_details_cpt - ) + col_shape_report = report.get_details('Column Shapes') + if 'Error' not in col_shape_report: + # Errors may not occur in certain scipy versions + expected_details_column_shapes.drop(columns=['Error'], inplace=True) + + # Errors may change based on versions of library installed. + col_pair_report = report.get_details('Column Pair Trends') + col_pair_report['Error'] = col_pair_report['Error'].apply(get_error_type) + + pd.testing.assert_frame_equal(col_shape_report, expected_details_column_shapes) + pd.testing.assert_frame_equal(col_pair_report, expected_details_cpt) def test_report_with_verbose(self, capsys): """Test the report with verbose. diff --git a/tests/utils.py b/tests/utils.py index fa583a27..a006ef43 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -40,3 +40,10 @@ def __eq__(self, other): """Assert equality by expanding the iterator.""" assert all(x == y for x, y in zip(self.iterator, other)) return True + + +def get_error_type(error): + if error is not None: + colon_index = error.find(':') + return error[:colon_index] + return None