Merge branch 'main' into issue-638-sequence-similarity

sdv-dev · Oct 28, 2024 · b9fab0e · b9fab0e
2 parents d5a3725 + f58f443
commit b9fab0e
Show file tree

Hide file tree

Showing 14 changed files with 29 additions and 32 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -76,7 +76,7 @@ dev = [
     'watchdog>=1.0.1,<5',
 
     # style check
-    'ruff>=0.3.2,<1',
+    'ruff>=0.3.2,<0.7.2',
 
     # distribute on PyPI
     'twine>=1.10.0,<6',
@@ -186,7 +186,7 @@ exclude = [
     ".git",
     "__pycache__",
     ".ipynb_checkpoints",
-    ".ipynb",
+    "*.ipynb",
     "tasks.py",
 ]
 
@@ -204,10 +204,11 @@ select = [
     # print statements
     "T201",
     # pandas-vet
-    "PD"
+    "PD",
+    # numpy 2.0
+    "NPY201"
 ]
 ignore = [
-    "E501",
     # pydocstyle
     "D107",  # Missing docstring in __init__
     "D417",   # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
@@ -229,7 +230,7 @@ lines-between-types = 0
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]
 "errors.py" = ["D105"]
-"tests/**.py" = ["D", "W505"]
+"tests/**.py" = ["D"]
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"

diff --git a/sdmetrics/base.py b/sdmetrics/base.py
@@ -99,8 +99,7 @@ def normalize(cls, raw_score):
 
         if score is None or score < 0 or score > 1:
             raise AssertionError(
-                f'This should be unreachable. The score {score} should be'
-                f'a value between 0 and 1.'
+                f'This should be unreachable. The score {score} should bea value between 0 and 1.'
             )
 
         if cls.goal == Goal.MINIMIZE:

diff --git a/sdmetrics/reports/base_report.py b/sdmetrics/reports/base_report.py
@@ -50,7 +50,7 @@ def _validate_metadata_matches_data(self, real_data, synthetic_data, metadata):
             error_message = (
                 'The metadata does not match the data. The following columns are missing'
                 ' in the real/synthetic data or in the metadata: '
-                f"{', '.join(sorted(missing_columns))}"
+                f'{", ".join(sorted(missing_columns))}'
             )
             raise ValueError(error_message)
 
@@ -145,7 +145,8 @@ def generate(self, real_data, synthetic_data, metadata, verbose=True):
         if not isinstance(metadata, dict):
             raise TypeError(
                 f"Expected a dictionary but received a '{type(metadata).__name__}' instead."
-                " For SDV metadata objects, please use the 'to_dict' function to convert it to a dictionary."
+                " For SDV metadata objects, please use the 'to_dict' function to convert it"
+                ' to a dictionary.'
             )
 
         self._validate(real_data, synthetic_data, metadata)

diff --git a/sdmetrics/reports/single_table/plot_utils.py b/sdmetrics/reports/single_table/plot_utils.py
@@ -313,7 +313,7 @@ def get_column_pairs_plot(score_breakdowns, average_score=None):
             xaxis='x',
             yaxis='y',
             hovertemplate=(
-                '<b>Column Pair</b><br>(%{x},%{y})<br><br>Similarity: ' '%{z}<extra></extra>'
+                '<b>Column Pair</b><br>(%{x},%{y})<br><br>Similarity: %{z}<extra></extra>'
             ),
         ),
         1,

diff --git a/sdmetrics/reports/utils.py b/sdmetrics/reports/utils.py
@@ -222,7 +222,9 @@ def _validate_categorical_values(real_data, synthetic_data, metadata, table=None
             The name of the current table, if one exists
     """
     if table:
-        warning_format = 'Unexpected values ({values}) in column "{column}" ' f'and table "{table}"'
+        warning_format = (
+            f'Unexpected values ({{values}}) in column "{{column}}" and table "{table}"'
+        )
     else:
         warning_format = 'Unexpected values ({values}) in column "{column}"'
 

diff --git a/sdmetrics/single_table/privacy/base.py b/sdmetrics/single_table/privacy/base.py
@@ -366,5 +366,5 @@ def score(self, key_data, sensitive_data):
                 The sensitive data.
         """
         raise NotImplementedError(
-            'Posterior probability based scoring not supported' 'for this attacker!'
+            'Posterior probability based scoring not supportedfor this attacker!'
         )
diff --git a/sdmetrics/single_table/privacy/util.py b/sdmetrics/single_table/privacy/util.py
@@ -70,7 +70,7 @@ def hamming_distance(target, test):
     """
     dist = 0
     assert len(target) == len(test), (
-        'Tuples must have the same length in the' 'calculation of hamming distance!'
+        'Tuples must have the same length in thecalculation of hamming distance!'
     )
 
     for target_entry, test_entry in zip(target, test):

diff --git a/sdmetrics/visualization.py b/sdmetrics/visualization.py
@@ -32,7 +32,7 @@ def wrapper(*args, **kwargs):
             ipython_interpreter = str(get_ipython())
             if 'ZMQInteractiveShell' in ipython_interpreter and 'iframe' in renderers:
                 # This means we are using jupyter notebook
-                pio.renderers.default = 'iframe'
+                pio.renderers.default = 'vscode'
 
         except Exception:
             pass
@@ -554,8 +554,9 @@ def get_column_plot(real_data, synthetic_data, column_name, plot_type=None):
             plot_type = 'bar'
     elif plot_type == 'distplot' and column_is_constant:
         raise ValueError(
-            f"Plot type 'distplot' cannot be created because column '{column_name}' has a constant value inside"
-            " the real or synthetic data. To render a visualization, please update the plot_type to 'bar'."
+            f"Plot type 'distplot' cannot be created because column '{column_name}'"
+            ' has a constant value inside the real or synthetic data. To render a'
+            " visualization, please update the plot_type to 'bar'."
         )
 
     fig = _generate_column_plot(real_column, synthetic_column, plot_type)

diff --git a/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py b/tests/unit/multi_table/statistical/test_cardinality_shape_similarity.py
@@ -197,8 +197,7 @@ def test_compute_breakdown_no_relationships(self):
         assert result == expected_metric_breakdown
 
     @patch(
-        'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.'
-        'normalize'
+        'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.normalize'
     )
     def test_normalize(self, normalize_mock):
         """Test the ``normalize`` method.

diff --git a/tests/unit/reports/multi_table/_properties/test_structure.py b/tests/unit/reports/multi_table/_properties/test_structure.py
@@ -52,7 +52,7 @@ def test_get_visualization(mock_px):
         'x': 'Table',
         'y': 'Score',
         'title': (
-            'Data Diagnostic: Structure (Average ' f'Score={mock__compute_average.return_value})'
+            f'Data Diagnostic: Structure (Average Score={mock__compute_average.return_value})'
         ),
         'category_orders': {'group': mock_df['Table'].tolist()},
         'color': 'Metric',

diff --git a/tests/unit/reports/single_table/_properties/test_column_shapes.py b/tests/unit/reports/single_table/_properties/test_column_shapes.py
@@ -147,8 +147,7 @@ def test_get_visualization(self, mock_px):
             'x': 'Column',
             'y': 'Score',
             'title': (
-                'Data Quality: Column Shapes (Average '
-                f'Score={mock__compute_average.return_value})'
+                f'Data Quality: Column Shapes (Average Score={mock__compute_average.return_value})'
             ),
             'category_orders': {'group': mock_df['Column'].tolist()},
             'color': 'Metric',

diff --git a/tests/unit/reports/single_table/_properties/test_structure.py b/tests/unit/reports/single_table/_properties/test_structure.py
@@ -9,7 +9,7 @@
 
 
 class TestStructure:
-    @patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute')
+    @patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute')
     def test__generate_details(self, table_format_mock):
         """Test the ``_generate_details`` method."""
         # Setup
@@ -49,7 +49,7 @@ def test__generate_details(self, table_format_mock):
         )
         pd.testing.assert_frame_equal(result, expected_details)
 
-    @patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute')
+    @patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute')
     def test__generate_details_with_id_column(self, table_format_mock):
         """Test the ``_generate_details`` method."""
         # Setup
@@ -96,7 +96,7 @@ def test_get_visualization(self):
 
         # Run and Assert
         expected_message = (
-            'The single table Structure property does not have a' ' supported visualization.'
+            'The single table Structure property does not have a supported visualization.'
         )
         with pytest.raises(VisualizationUnavailableError, match=expected_message):
             structure_property.get_visualization()
diff --git a/tests/unit/reports/single_table/_properties/test_synthesis.py b/tests/unit/reports/single_table/_properties/test_synthesis.py
@@ -7,9 +7,7 @@
 
 
 class TestSynthesis:
-    @patch(
-        'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown'
-    )
+    @patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown')
     def test__generate_details(self, newrowsynthesis_mock):
         """Test the ``_generate_details`` method.
 
@@ -64,9 +62,7 @@ def test__generate_details(self, newrowsynthesis_mock):
 
         pd.testing.assert_frame_equal(details, expected__details)
 
-    @patch(
-        'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown'
-    )
+    @patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown')
     def test__generate_details_error(self, newrowsynthesis_mock):
         """Test the ``_generate_details`` method when the metric raises an error."""
         # Setup

diff --git a/tests/unit/single_column/statistical/test_category_adherence.py b/tests/unit/single_column/statistical/test_category_adherence.py
@@ -36,8 +36,7 @@ def test_compute_breakdown_with_nans(self):
         assert result == {'score': 0.9}
 
     @patch(
-        'sdmetrics.single_column.statistical.category_adherence.'
-        'CategoryAdherence.compute_breakdown'
+        'sdmetrics.single_column.statistical.category_adherence.CategoryAdherence.compute_breakdown'
     )
     def test_compute(self, compute_breakdown_mock):
         """Test the ``compute`` method."""