From f58f4437663fd721f91947ac2a46005ef7f90494 Mon Sep 17 00:00:00 2001
From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com>
Date: Mon, 28 Oct 2024 10:07:26 -0400
Subject: [PATCH] Fix lint + Pin ruff version (#646)
---
pyproject.toml | 11 ++++++-----
sdmetrics/base.py | 3 +--
sdmetrics/reports/base_report.py | 5 +++--
sdmetrics/reports/single_table/plot_utils.py | 2 +-
sdmetrics/reports/utils.py | 4 +++-
sdmetrics/single_table/privacy/base.py | 2 +-
sdmetrics/single_table/privacy/util.py | 2 +-
sdmetrics/visualization.py | 7 ++++---
.../statistical/test_cardinality_shape_similarity.py | 3 +--
.../reports/multi_table/_properties/test_structure.py | 2 +-
.../single_table/_properties/test_column_shapes.py | 3 +--
.../single_table/_properties/test_structure.py | 6 +++---
.../single_table/_properties/test_synthesis.py | 8 ++------
.../statistical/test_category_adherence.py | 3 +--
14 files changed, 29 insertions(+), 32 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 23b42eb6..d64d78f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,7 +76,7 @@ dev = [
'watchdog>=1.0.1,<5',
# style check
- 'ruff>=0.3.2,<1',
+ 'ruff>=0.3.2,<0.7.2',
# distribute on PyPI
'twine>=1.10.0,<6',
@@ -186,7 +186,7 @@ exclude = [
".git",
"__pycache__",
".ipynb_checkpoints",
- ".ipynb",
+ "*.ipynb",
"tasks.py",
]
@@ -204,10 +204,11 @@ select = [
# print statements
"T201",
# pandas-vet
- "PD"
+ "PD",
+ # numpy 2.0
+ "NPY201"
]
ignore = [
- "E501",
# pydocstyle
"D107", # Missing docstring in __init__
"D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
@@ -229,7 +230,7 @@ lines-between-types = 0
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]
"errors.py" = ["D105"]
-"tests/**.py" = ["D", "W505"]
+"tests/**.py" = ["D"]
[tool.ruff.lint.pydocstyle]
convention = "google"
diff --git a/sdmetrics/base.py b/sdmetrics/base.py
index c8ac9033..650a76c0 100644
--- a/sdmetrics/base.py
+++ b/sdmetrics/base.py
@@ -99,8 +99,7 @@ def normalize(cls, raw_score):
if score is None or score < 0 or score > 1:
raise AssertionError(
- f'This should be unreachable. The score {score} should be'
- f'a value between 0 and 1.'
+ f'This should be unreachable. The score {score} should bea value between 0 and 1.'
)
if cls.goal == Goal.MINIMIZE:
diff --git a/sdmetrics/reports/base_report.py b/sdmetrics/reports/base_report.py
index a179baed..614b8083 100644
--- a/sdmetrics/reports/base_report.py
+++ b/sdmetrics/reports/base_report.py
@@ -50,7 +50,7 @@ def _validate_metadata_matches_data(self, real_data, synthetic_data, metadata):
error_message = (
'The metadata does not match the data. The following columns are missing'
' in the real/synthetic data or in the metadata: '
- f"{', '.join(sorted(missing_columns))}"
+ f'{", ".join(sorted(missing_columns))}'
)
raise ValueError(error_message)
@@ -145,7 +145,8 @@ def generate(self, real_data, synthetic_data, metadata, verbose=True):
if not isinstance(metadata, dict):
raise TypeError(
f"Expected a dictionary but received a '{type(metadata).__name__}' instead."
- " For SDV metadata objects, please use the 'to_dict' function to convert it to a dictionary."
+ " For SDV metadata objects, please use the 'to_dict' function to convert it"
+ ' to a dictionary.'
)
self._validate(real_data, synthetic_data, metadata)
diff --git a/sdmetrics/reports/single_table/plot_utils.py b/sdmetrics/reports/single_table/plot_utils.py
index 202cf060..995ccb85 100644
--- a/sdmetrics/reports/single_table/plot_utils.py
+++ b/sdmetrics/reports/single_table/plot_utils.py
@@ -313,7 +313,7 @@ def get_column_pairs_plot(score_breakdowns, average_score=None):
xaxis='x',
yaxis='y',
hovertemplate=(
- 'Column Pair
(%{x},%{y})
Similarity: ' '%{z}'
+ 'Column Pair
(%{x},%{y})
Similarity: %{z}'
),
),
1,
diff --git a/sdmetrics/reports/utils.py b/sdmetrics/reports/utils.py
index 66d80239..c019e0ab 100644
--- a/sdmetrics/reports/utils.py
+++ b/sdmetrics/reports/utils.py
@@ -222,7 +222,9 @@ def _validate_categorical_values(real_data, synthetic_data, metadata, table=None
The name of the current table, if one exists
"""
if table:
- warning_format = 'Unexpected values ({values}) in column "{column}" ' f'and table "{table}"'
+ warning_format = (
+ f'Unexpected values ({{values}}) in column "{{column}}" and table "{table}"'
+ )
else:
warning_format = 'Unexpected values ({values}) in column "{column}"'
diff --git a/sdmetrics/single_table/privacy/base.py b/sdmetrics/single_table/privacy/base.py
index e67327a9..08a8d600 100644
--- a/sdmetrics/single_table/privacy/base.py
+++ b/sdmetrics/single_table/privacy/base.py
@@ -366,5 +366,5 @@ def score(self, key_data, sensitive_data):
The sensitive data.
"""
raise NotImplementedError(
- 'Posterior probability based scoring not supported' 'for this attacker!'
+ 'Posterior probability based scoring not supportedfor this attacker!'
)
diff --git a/sdmetrics/single_table/privacy/util.py b/sdmetrics/single_table/privacy/util.py
index 9873c42b..918bd969 100644
--- a/sdmetrics/single_table/privacy/util.py
+++ b/sdmetrics/single_table/privacy/util.py
@@ -70,7 +70,7 @@ def hamming_distance(target, test):
"""
dist = 0
assert len(target) == len(test), (
- 'Tuples must have the same length in the' 'calculation of hamming distance!'
+ 'Tuples must have the same length in thecalculation of hamming distance!'
)
for target_entry, test_entry in zip(target, test):
diff --git a/sdmetrics/visualization.py b/sdmetrics/visualization.py
index 52377991..24427b5a 100644
--- a/sdmetrics/visualization.py
+++ b/sdmetrics/visualization.py
@@ -32,7 +32,7 @@ def wrapper(*args, **kwargs):
ipython_interpreter = str(get_ipython())
if 'ZMQInteractiveShell' in ipython_interpreter and 'iframe' in renderers:
# This means we are using jupyter notebook
- pio.renderers.default = 'iframe'
+ pio.renderers.default = 'vscode'
except Exception:
pass
@@ -554,8 +554,9 @@ def get_column_plot(real_data, synthetic_data, column_name, plot_type=None):
plot_type = 'bar'
elif plot_type == 'distplot' and column_is_constant:
raise ValueError(
- f"Plot type 'distplot' cannot be created because column '{column_name}' has a constant value inside"
- " the real or synthetic data. To render a visualization, please update the plot_type to 'bar'."
+ f"Plot type 'distplot' cannot be created because column '{column_name}'"
+ ' has a constant value inside the real or synthetic data. To render a'
+ " visualization, please update the plot_type to 'bar'."
)
fig = _generate_column_plot(real_column, synthetic_column, plot_type)
diff --git a/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py b/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py
index 9fc41112..9234577f 100644
--- a/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py
+++ b/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py
@@ -197,8 +197,7 @@ def test_compute_breakdown_no_relationships(self):
assert result == expected_metric_breakdown
@patch(
- 'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.'
- 'normalize'
+ 'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.normalize'
)
def test_normalize(self, normalize_mock):
"""Test the ``normalize`` method.
diff --git a/tests/unit/reports/multi_table/_properties/test_structure.py b/tests/unit/reports/multi_table/_properties/test_structure.py
index d8b1dc2e..81a14635 100644
--- a/tests/unit/reports/multi_table/_properties/test_structure.py
+++ b/tests/unit/reports/multi_table/_properties/test_structure.py
@@ -52,7 +52,7 @@ def test_get_visualization(mock_px):
'x': 'Table',
'y': 'Score',
'title': (
- 'Data Diagnostic: Structure (Average ' f'Score={mock__compute_average.return_value})'
+ f'Data Diagnostic: Structure (Average Score={mock__compute_average.return_value})'
),
'category_orders': {'group': mock_df['Table'].tolist()},
'color': 'Metric',
diff --git a/tests/unit/reports/single_table/_properties/test_column_shapes.py b/tests/unit/reports/single_table/_properties/test_column_shapes.py
index 2aea852a..f9c688cf 100644
--- a/tests/unit/reports/single_table/_properties/test_column_shapes.py
+++ b/tests/unit/reports/single_table/_properties/test_column_shapes.py
@@ -147,8 +147,7 @@ def test_get_visualization(self, mock_px):
'x': 'Column',
'y': 'Score',
'title': (
- 'Data Quality: Column Shapes (Average '
- f'Score={mock__compute_average.return_value})'
+ f'Data Quality: Column Shapes (Average Score={mock__compute_average.return_value})'
),
'category_orders': {'group': mock_df['Column'].tolist()},
'color': 'Metric',
diff --git a/tests/unit/reports/single_table/_properties/test_structure.py b/tests/unit/reports/single_table/_properties/test_structure.py
index cd82a7ed..32869132 100644
--- a/tests/unit/reports/single_table/_properties/test_structure.py
+++ b/tests/unit/reports/single_table/_properties/test_structure.py
@@ -9,7 +9,7 @@
class TestStructure:
- @patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute')
+ @patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute')
def test__generate_details(self, table_format_mock):
"""Test the ``_generate_details`` method."""
# Setup
@@ -49,7 +49,7 @@ def test__generate_details(self, table_format_mock):
)
pd.testing.assert_frame_equal(result, expected_details)
- @patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute')
+ @patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute')
def test__generate_details_with_id_column(self, table_format_mock):
"""Test the ``_generate_details`` method."""
# Setup
@@ -96,7 +96,7 @@ def test_get_visualization(self):
# Run and Assert
expected_message = (
- 'The single table Structure property does not have a' ' supported visualization.'
+ 'The single table Structure property does not have a supported visualization.'
)
with pytest.raises(VisualizationUnavailableError, match=expected_message):
structure_property.get_visualization()
diff --git a/tests/unit/reports/single_table/_properties/test_synthesis.py b/tests/unit/reports/single_table/_properties/test_synthesis.py
index 7b8da813..68a4eb7c 100644
--- a/tests/unit/reports/single_table/_properties/test_synthesis.py
+++ b/tests/unit/reports/single_table/_properties/test_synthesis.py
@@ -7,9 +7,7 @@
class TestSynthesis:
- @patch(
- 'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown'
- )
+ @patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown')
def test__generate_details(self, newrowsynthesis_mock):
"""Test the ``_generate_details`` method.
@@ -64,9 +62,7 @@ def test__generate_details(self, newrowsynthesis_mock):
pd.testing.assert_frame_equal(details, expected__details)
- @patch(
- 'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown'
- )
+ @patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown')
def test__generate_details_error(self, newrowsynthesis_mock):
"""Test the ``_generate_details`` method when the metric raises an error."""
# Setup
diff --git a/tests/unit/single_column/statistical/test_category_adherence.py b/tests/unit/single_column/statistical/test_category_adherence.py
index f4af0bd8..c77f0820 100644
--- a/tests/unit/single_column/statistical/test_category_adherence.py
+++ b/tests/unit/single_column/statistical/test_category_adherence.py
@@ -36,8 +36,7 @@ def test_compute_breakdown_with_nans(self):
assert result == {'score': 0.9}
@patch(
- 'sdmetrics.single_column.statistical.category_adherence.'
- 'CategoryAdherence.compute_breakdown'
+ 'sdmetrics.single_column.statistical.category_adherence.CategoryAdherence.compute_breakdown'
)
def test_compute(self, compute_breakdown_mock):
"""Test the ``compute`` method."""