From f58f4437663fd721f91947ac2a46005ef7f90494 Mon Sep 17 00:00:00 2001 From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:07:26 -0400 Subject: [PATCH] Fix lint + Pin ruff version (#646) --- pyproject.toml | 11 ++++++----- sdmetrics/base.py | 3 +-- sdmetrics/reports/base_report.py | 5 +++-- sdmetrics/reports/single_table/plot_utils.py | 2 +- sdmetrics/reports/utils.py | 4 +++- sdmetrics/single_table/privacy/base.py | 2 +- sdmetrics/single_table/privacy/util.py | 2 +- sdmetrics/visualization.py | 7 ++++--- .../statistical/test_cardinality_shape_similarity.py | 3 +-- .../reports/multi_table/_properties/test_structure.py | 2 +- .../single_table/_properties/test_column_shapes.py | 3 +-- .../single_table/_properties/test_structure.py | 6 +++--- .../single_table/_properties/test_synthesis.py | 8 ++------ .../statistical/test_category_adherence.py | 3 +-- 14 files changed, 29 insertions(+), 32 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 23b42eb6..d64d78f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ dev = [ 'watchdog>=1.0.1,<5', # style check - 'ruff>=0.3.2,<1', + 'ruff>=0.3.2,<0.7.2', # distribute on PyPI 'twine>=1.10.0,<6', @@ -186,7 +186,7 @@ exclude = [ ".git", "__pycache__", ".ipynb_checkpoints", - ".ipynb", + "*.ipynb", "tasks.py", ] @@ -204,10 +204,11 @@ select = [ # print statements "T201", # pandas-vet - "PD" + "PD", + # numpy 2.0 + "NPY201" ] ignore = [ - "E501", # pydocstyle "D107", # Missing docstring in __init__ "D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449 @@ -229,7 +230,7 @@ lines-between-types = 0 [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"] "errors.py" = ["D105"] -"tests/**.py" = ["D", "W505"] +"tests/**.py" = ["D"] [tool.ruff.lint.pydocstyle] convention = "google" diff --git a/sdmetrics/base.py b/sdmetrics/base.py index c8ac9033..650a76c0 100644 --- a/sdmetrics/base.py +++ b/sdmetrics/base.py @@ -99,8 +99,7 @@ def normalize(cls, raw_score): if score is None or score < 0 or score > 1: raise AssertionError( - f'This should be unreachable. The score {score} should be' - f'a value between 0 and 1.' + f'This should be unreachable. The score {score} should bea value between 0 and 1.' ) if cls.goal == Goal.MINIMIZE: diff --git a/sdmetrics/reports/base_report.py b/sdmetrics/reports/base_report.py index a179baed..614b8083 100644 --- a/sdmetrics/reports/base_report.py +++ b/sdmetrics/reports/base_report.py @@ -50,7 +50,7 @@ def _validate_metadata_matches_data(self, real_data, synthetic_data, metadata): error_message = ( 'The metadata does not match the data. The following columns are missing' ' in the real/synthetic data or in the metadata: ' - f"{', '.join(sorted(missing_columns))}" + f'{", ".join(sorted(missing_columns))}' ) raise ValueError(error_message) @@ -145,7 +145,8 @@ def generate(self, real_data, synthetic_data, metadata, verbose=True): if not isinstance(metadata, dict): raise TypeError( f"Expected a dictionary but received a '{type(metadata).__name__}' instead." - " For SDV metadata objects, please use the 'to_dict' function to convert it to a dictionary." + " For SDV metadata objects, please use the 'to_dict' function to convert it" + ' to a dictionary.' ) self._validate(real_data, synthetic_data, metadata) diff --git a/sdmetrics/reports/single_table/plot_utils.py b/sdmetrics/reports/single_table/plot_utils.py index 202cf060..995ccb85 100644 --- a/sdmetrics/reports/single_table/plot_utils.py +++ b/sdmetrics/reports/single_table/plot_utils.py @@ -313,7 +313,7 @@ def get_column_pairs_plot(score_breakdowns, average_score=None): xaxis='x', yaxis='y', hovertemplate=( - 'Column Pair
(%{x},%{y})

Similarity: ' '%{z}' + 'Column Pair
(%{x},%{y})

Similarity: %{z}' ), ), 1, diff --git a/sdmetrics/reports/utils.py b/sdmetrics/reports/utils.py index 66d80239..c019e0ab 100644 --- a/sdmetrics/reports/utils.py +++ b/sdmetrics/reports/utils.py @@ -222,7 +222,9 @@ def _validate_categorical_values(real_data, synthetic_data, metadata, table=None The name of the current table, if one exists """ if table: - warning_format = 'Unexpected values ({values}) in column "{column}" ' f'and table "{table}"' + warning_format = ( + f'Unexpected values ({{values}}) in column "{{column}}" and table "{table}"' + ) else: warning_format = 'Unexpected values ({values}) in column "{column}"' diff --git a/sdmetrics/single_table/privacy/base.py b/sdmetrics/single_table/privacy/base.py index e67327a9..08a8d600 100644 --- a/sdmetrics/single_table/privacy/base.py +++ b/sdmetrics/single_table/privacy/base.py @@ -366,5 +366,5 @@ def score(self, key_data, sensitive_data): The sensitive data. """ raise NotImplementedError( - 'Posterior probability based scoring not supported' 'for this attacker!' + 'Posterior probability based scoring not supportedfor this attacker!' ) diff --git a/sdmetrics/single_table/privacy/util.py b/sdmetrics/single_table/privacy/util.py index 9873c42b..918bd969 100644 --- a/sdmetrics/single_table/privacy/util.py +++ b/sdmetrics/single_table/privacy/util.py @@ -70,7 +70,7 @@ def hamming_distance(target, test): """ dist = 0 assert len(target) == len(test), ( - 'Tuples must have the same length in the' 'calculation of hamming distance!' + 'Tuples must have the same length in thecalculation of hamming distance!' ) for target_entry, test_entry in zip(target, test): diff --git a/sdmetrics/visualization.py b/sdmetrics/visualization.py index 52377991..24427b5a 100644 --- a/sdmetrics/visualization.py +++ b/sdmetrics/visualization.py @@ -32,7 +32,7 @@ def wrapper(*args, **kwargs): ipython_interpreter = str(get_ipython()) if 'ZMQInteractiveShell' in ipython_interpreter and 'iframe' in renderers: # This means we are using jupyter notebook - pio.renderers.default = 'iframe' + pio.renderers.default = 'vscode' except Exception: pass @@ -554,8 +554,9 @@ def get_column_plot(real_data, synthetic_data, column_name, plot_type=None): plot_type = 'bar' elif plot_type == 'distplot' and column_is_constant: raise ValueError( - f"Plot type 'distplot' cannot be created because column '{column_name}' has a constant value inside" - " the real or synthetic data. To render a visualization, please update the plot_type to 'bar'." + f"Plot type 'distplot' cannot be created because column '{column_name}'" + ' has a constant value inside the real or synthetic data. To render a' + " visualization, please update the plot_type to 'bar'." ) fig = _generate_column_plot(real_column, synthetic_column, plot_type) diff --git a/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py b/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py index 9fc41112..9234577f 100644 --- a/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py +++ b/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py @@ -197,8 +197,7 @@ def test_compute_breakdown_no_relationships(self): assert result == expected_metric_breakdown @patch( - 'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.' - 'normalize' + 'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.normalize' ) def test_normalize(self, normalize_mock): """Test the ``normalize`` method. diff --git a/tests/unit/reports/multi_table/_properties/test_structure.py b/tests/unit/reports/multi_table/_properties/test_structure.py index d8b1dc2e..81a14635 100644 --- a/tests/unit/reports/multi_table/_properties/test_structure.py +++ b/tests/unit/reports/multi_table/_properties/test_structure.py @@ -52,7 +52,7 @@ def test_get_visualization(mock_px): 'x': 'Table', 'y': 'Score', 'title': ( - 'Data Diagnostic: Structure (Average ' f'Score={mock__compute_average.return_value})' + f'Data Diagnostic: Structure (Average Score={mock__compute_average.return_value})' ), 'category_orders': {'group': mock_df['Table'].tolist()}, 'color': 'Metric', diff --git a/tests/unit/reports/single_table/_properties/test_column_shapes.py b/tests/unit/reports/single_table/_properties/test_column_shapes.py index 2aea852a..f9c688cf 100644 --- a/tests/unit/reports/single_table/_properties/test_column_shapes.py +++ b/tests/unit/reports/single_table/_properties/test_column_shapes.py @@ -147,8 +147,7 @@ def test_get_visualization(self, mock_px): 'x': 'Column', 'y': 'Score', 'title': ( - 'Data Quality: Column Shapes (Average ' - f'Score={mock__compute_average.return_value})' + f'Data Quality: Column Shapes (Average Score={mock__compute_average.return_value})' ), 'category_orders': {'group': mock_df['Column'].tolist()}, 'color': 'Metric', diff --git a/tests/unit/reports/single_table/_properties/test_structure.py b/tests/unit/reports/single_table/_properties/test_structure.py index cd82a7ed..32869132 100644 --- a/tests/unit/reports/single_table/_properties/test_structure.py +++ b/tests/unit/reports/single_table/_properties/test_structure.py @@ -9,7 +9,7 @@ class TestStructure: - @patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute') + @patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute') def test__generate_details(self, table_format_mock): """Test the ``_generate_details`` method.""" # Setup @@ -49,7 +49,7 @@ def test__generate_details(self, table_format_mock): ) pd.testing.assert_frame_equal(result, expected_details) - @patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute') + @patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute') def test__generate_details_with_id_column(self, table_format_mock): """Test the ``_generate_details`` method.""" # Setup @@ -96,7 +96,7 @@ def test_get_visualization(self): # Run and Assert expected_message = ( - 'The single table Structure property does not have a' ' supported visualization.' + 'The single table Structure property does not have a supported visualization.' ) with pytest.raises(VisualizationUnavailableError, match=expected_message): structure_property.get_visualization() diff --git a/tests/unit/reports/single_table/_properties/test_synthesis.py b/tests/unit/reports/single_table/_properties/test_synthesis.py index 7b8da813..68a4eb7c 100644 --- a/tests/unit/reports/single_table/_properties/test_synthesis.py +++ b/tests/unit/reports/single_table/_properties/test_synthesis.py @@ -7,9 +7,7 @@ class TestSynthesis: - @patch( - 'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown' - ) + @patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown') def test__generate_details(self, newrowsynthesis_mock): """Test the ``_generate_details`` method. @@ -64,9 +62,7 @@ def test__generate_details(self, newrowsynthesis_mock): pd.testing.assert_frame_equal(details, expected__details) - @patch( - 'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown' - ) + @patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown') def test__generate_details_error(self, newrowsynthesis_mock): """Test the ``_generate_details`` method when the metric raises an error.""" # Setup diff --git a/tests/unit/single_column/statistical/test_category_adherence.py b/tests/unit/single_column/statistical/test_category_adherence.py index f4af0bd8..c77f0820 100644 --- a/tests/unit/single_column/statistical/test_category_adherence.py +++ b/tests/unit/single_column/statistical/test_category_adherence.py @@ -36,8 +36,7 @@ def test_compute_breakdown_with_nans(self): assert result == {'score': 0.9} @patch( - 'sdmetrics.single_column.statistical.category_adherence.' - 'CategoryAdherence.compute_breakdown' + 'sdmetrics.single_column.statistical.category_adherence.CategoryAdherence.compute_breakdown' ) def test_compute(self, compute_breakdown_mock): """Test the ``compute`` method."""