Skip to content

Commit

Permalink
Merge branch 'main' into issue-638-sequence-similarity
Browse files Browse the repository at this point in the history
  • Loading branch information
fealho authored Oct 28, 2024
2 parents d5a3725 + f58f443 commit b9fab0e
Show file tree
Hide file tree
Showing 14 changed files with 29 additions and 32 deletions.
11 changes: 6 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dev = [
'watchdog>=1.0.1,<5',

# style check
'ruff>=0.3.2,<1',
'ruff>=0.3.2,<0.7.2',

# distribute on PyPI
'twine>=1.10.0,<6',
Expand Down Expand Up @@ -186,7 +186,7 @@ exclude = [
".git",
"__pycache__",
".ipynb_checkpoints",
".ipynb",
"*.ipynb",
"tasks.py",
]

Expand All @@ -204,10 +204,11 @@ select = [
# print statements
"T201",
# pandas-vet
"PD"
"PD",
# numpy 2.0
"NPY201"
]
ignore = [
"E501",
# pydocstyle
"D107", # Missing docstring in __init__
"D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
Expand All @@ -229,7 +230,7 @@ lines-between-types = 0
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]
"errors.py" = ["D105"]
"tests/**.py" = ["D", "W505"]
"tests/**.py" = ["D"]

[tool.ruff.lint.pydocstyle]
convention = "google"
Expand Down
3 changes: 1 addition & 2 deletions sdmetrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ def normalize(cls, raw_score):

if score is None or score < 0 or score > 1:
raise AssertionError(
f'This should be unreachable. The score {score} should be'
f'a value between 0 and 1.'
f'This should be unreachable. The score {score} should bea value between 0 and 1.'
)

if cls.goal == Goal.MINIMIZE:
Expand Down
5 changes: 3 additions & 2 deletions sdmetrics/reports/base_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _validate_metadata_matches_data(self, real_data, synthetic_data, metadata):
error_message = (
'The metadata does not match the data. The following columns are missing'
' in the real/synthetic data or in the metadata: '
f"{', '.join(sorted(missing_columns))}"
f'{", ".join(sorted(missing_columns))}'
)
raise ValueError(error_message)

Expand Down Expand Up @@ -145,7 +145,8 @@ def generate(self, real_data, synthetic_data, metadata, verbose=True):
if not isinstance(metadata, dict):
raise TypeError(
f"Expected a dictionary but received a '{type(metadata).__name__}' instead."
" For SDV metadata objects, please use the 'to_dict' function to convert it to a dictionary."
" For SDV metadata objects, please use the 'to_dict' function to convert it"
' to a dictionary.'
)

self._validate(real_data, synthetic_data, metadata)
Expand Down
2 changes: 1 addition & 1 deletion sdmetrics/reports/single_table/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def get_column_pairs_plot(score_breakdowns, average_score=None):
xaxis='x',
yaxis='y',
hovertemplate=(
'<b>Column Pair</b><br>(%{x},%{y})<br><br>Similarity: ' '%{z}<extra></extra>'
'<b>Column Pair</b><br>(%{x},%{y})<br><br>Similarity: %{z}<extra></extra>'
),
),
1,
Expand Down
4 changes: 3 additions & 1 deletion sdmetrics/reports/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ def _validate_categorical_values(real_data, synthetic_data, metadata, table=None
The name of the current table, if one exists
"""
if table:
warning_format = 'Unexpected values ({values}) in column "{column}" ' f'and table "{table}"'
warning_format = (
f'Unexpected values ({{values}}) in column "{{column}}" and table "{table}"'
)
else:
warning_format = 'Unexpected values ({values}) in column "{column}"'

Expand Down
2 changes: 1 addition & 1 deletion sdmetrics/single_table/privacy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,5 +366,5 @@ def score(self, key_data, sensitive_data):
The sensitive data.
"""
raise NotImplementedError(
'Posterior probability based scoring not supported' 'for this attacker!'
'Posterior probability based scoring not supportedfor this attacker!'
)
2 changes: 1 addition & 1 deletion sdmetrics/single_table/privacy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def hamming_distance(target, test):
"""
dist = 0
assert len(target) == len(test), (
'Tuples must have the same length in the' 'calculation of hamming distance!'
'Tuples must have the same length in thecalculation of hamming distance!'
)

for target_entry, test_entry in zip(target, test):
Expand Down
7 changes: 4 additions & 3 deletions sdmetrics/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def wrapper(*args, **kwargs):
ipython_interpreter = str(get_ipython())
if 'ZMQInteractiveShell' in ipython_interpreter and 'iframe' in renderers:
# This means we are using jupyter notebook
pio.renderers.default = 'iframe'
pio.renderers.default = 'vscode'

except Exception:
pass
Expand Down Expand Up @@ -554,8 +554,9 @@ def get_column_plot(real_data, synthetic_data, column_name, plot_type=None):
plot_type = 'bar'
elif plot_type == 'distplot' and column_is_constant:
raise ValueError(
f"Plot type 'distplot' cannot be created because column '{column_name}' has a constant value inside"
" the real or synthetic data. To render a visualization, please update the plot_type to 'bar'."
f"Plot type 'distplot' cannot be created because column '{column_name}'"
' has a constant value inside the real or synthetic data. To render a'
" visualization, please update the plot_type to 'bar'."
)

fig = _generate_column_plot(real_column, synthetic_column, plot_type)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,7 @@ def test_compute_breakdown_no_relationships(self):
assert result == expected_metric_breakdown

@patch(
'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.'
'normalize'
'sdmetrics.multi_table.statistical.cardinality_shape_similarity.MultiTableMetric.normalize'
)
def test_normalize(self, normalize_mock):
"""Test the ``normalize`` method.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_get_visualization(mock_px):
'x': 'Table',
'y': 'Score',
'title': (
'Data Diagnostic: Structure (Average ' f'Score={mock__compute_average.return_value})'
f'Data Diagnostic: Structure (Average Score={mock__compute_average.return_value})'
),
'category_orders': {'group': mock_df['Table'].tolist()},
'color': 'Metric',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ def test_get_visualization(self, mock_px):
'x': 'Column',
'y': 'Score',
'title': (
'Data Quality: Column Shapes (Average '
f'Score={mock__compute_average.return_value})'
f'Data Quality: Column Shapes (Average Score={mock__compute_average.return_value})'
),
'category_orders': {'group': mock_df['Column'].tolist()},
'color': 'Metric',
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/reports/single_table/_properties/test_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class TestStructure:
@patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute')
@patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute')
def test__generate_details(self, table_format_mock):
"""Test the ``_generate_details`` method."""
# Setup
Expand Down Expand Up @@ -49,7 +49,7 @@ def test__generate_details(self, table_format_mock):
)
pd.testing.assert_frame_equal(result, expected_details)

@patch('sdmetrics.reports.single_table._properties.structure.' 'TableStructure.compute')
@patch('sdmetrics.reports.single_table._properties.structure.TableStructure.compute')
def test__generate_details_with_id_column(self, table_format_mock):
"""Test the ``_generate_details`` method."""
# Setup
Expand Down Expand Up @@ -96,7 +96,7 @@ def test_get_visualization(self):

# Run and Assert
expected_message = (
'The single table Structure property does not have a' ' supported visualization.'
'The single table Structure property does not have a supported visualization.'
)
with pytest.raises(VisualizationUnavailableError, match=expected_message):
structure_property.get_visualization()
8 changes: 2 additions & 6 deletions tests/unit/reports/single_table/_properties/test_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@


class TestSynthesis:
@patch(
'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown'
)
@patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown')
def test__generate_details(self, newrowsynthesis_mock):
"""Test the ``_generate_details`` method.
Expand Down Expand Up @@ -64,9 +62,7 @@ def test__generate_details(self, newrowsynthesis_mock):

pd.testing.assert_frame_equal(details, expected__details)

@patch(
'sdmetrics.reports.single_table._properties.synthesis.' 'NewRowSynthesis.compute_breakdown'
)
@patch('sdmetrics.reports.single_table._properties.synthesis.NewRowSynthesis.compute_breakdown')
def test__generate_details_error(self, newrowsynthesis_mock):
"""Test the ``_generate_details`` method when the metric raises an error."""
# Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def test_compute_breakdown_with_nans(self):
assert result == {'score': 0.9}

@patch(
'sdmetrics.single_column.statistical.category_adherence.'
'CategoryAdherence.compute_breakdown'
'sdmetrics.single_column.statistical.category_adherence.CategoryAdherence.compute_breakdown'
)
def test_compute(self, compute_breakdown_mock):
"""Test the ``compute`` method."""
Expand Down

0 comments on commit b9fab0e

Please sign in to comment.