Skip to content

Commit

Permalink
Return nan if baseline is None and fix warning message
Browse files Browse the repository at this point in the history
  • Loading branch information
frances-h committed Dec 12, 2024
1 parent 77df479 commit ce563ed
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 18 deletions.
8 changes: 4 additions & 4 deletions sdmetrics/single_table/privacy/disclosure_protection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
CategoricalZeroCAP,
)

MAX_NUM_ROWS = 50000
MAX_NUM_ROWS = 10000

CAP_METHODS = {
'CAP': CategoricalCAP,
Expand Down Expand Up @@ -212,7 +212,7 @@ def compute_breakdown(
computation_method = computation_method.upper()
if len(real_data) > MAX_NUM_ROWS or len(synthetic_data) > MAX_NUM_ROWS:
warnings.warn(
f'Data exceeds {MAX_NUM_ROWS} rows, perfomance may be slow.'
f'Data exceeds {MAX_NUM_ROWS} rows, perfomance may be slow. '
'Consider using the `DisclosureProtectionEstimate` for faster computation.'
)

Expand All @@ -238,7 +238,7 @@ def compute_breakdown(
)

if baseline_protection == 0:
score = 0 if cap_protection == 0 else 1
score = np.nan
else:
score = min(cap_protection / baseline_protection, 1)

Expand Down Expand Up @@ -363,7 +363,7 @@ def _compute_estimated_cap_metric(
estimated_score_sum += estimated_cap_protection
average_computed_score = estimated_score_sum / (i + 1.0)
if baseline_protection == 0:
average_score = 0 if average_computed_score == 0 else 1
average_score = np.nan
else:
average_score = min(average_computed_score / baseline_protection, 1)

Expand Down
19 changes: 5 additions & 14 deletions tests/unit/single_table/privacy/test_disclosure_protection.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,28 +271,19 @@ def test_compute_breakdown_zero_baseline(self, CAPMethodsMock):
CAPMethodsMock.get.return_value = CAPMock

# Run
score_breakdown_with_cap = DisclosureProtection.compute_breakdown(
real_data=real_data,
synthetic_data=synthetic_data,
known_column_names=['col1'],
sensitive_column_names=['col2'],
)

CAPMock._compute.return_value = 0
score_breakdown_no_cap = DisclosureProtection.compute_breakdown(
score_breakdown = DisclosureProtection.compute_breakdown(
real_data=real_data,
synthetic_data=synthetic_data,
known_column_names=['col1'],
sensitive_column_names=['col2'],
)

# Assert
assert score_breakdown_with_cap == {
'score': 1,
assert score_breakdown == {
'score': np.nan,
'baseline_protection': 0,
'cap_protection': 0.5,
}
assert score_breakdown_no_cap == {'score': 0, 'baseline_protection': 0, 'cap_protection': 0}

@patch('sdmetrics.single_table.privacy.disclosure_protection.CAP_METHODS')
@patch(
Expand Down Expand Up @@ -323,7 +314,7 @@ def test_compute_breakdown_warns_too_large(

# Run
expected_warning = re.escape(
'Data exceeds 50000 rows, perfomance may be slow.'
'Data exceeds 10000 rows, perfomance may be slow. '
'Consider using the `DisclosureProtectionEstimate` for faster computation.'
)
with pytest.warns(UserWarning, match=expected_warning):
Expand Down Expand Up @@ -486,7 +477,7 @@ def test__compute_estimated_cap_metric_zero_baseline(self, CAPMethodsMock):
)

# Assert
assert avg_score == 1
assert np.isnan(avg_score)
assert avg_computed_score == 0.38

@patch(
Expand Down

0 comments on commit ce563ed

Please sign in to comment.