diff --git a/sdmetrics/single_column/statistical/key_uniqueness.py b/sdmetrics/single_column/statistical/key_uniqueness.py index 052d9339..be25c0e8 100644 --- a/sdmetrics/single_column/statistical/key_uniqueness.py +++ b/sdmetrics/single_column/statistical/key_uniqueness.py @@ -8,7 +8,7 @@ class KeyUniqueness(SingleColumnMetric): """Key uniqueness metric. - Compute the fraction of rows in the synthetic data that are unique. + The proportion of data points in the synthetic data that are unique. Attributes: name (str): @@ -51,7 +51,7 @@ def compute_breakdown(cls, real_data, synthetic_data): @classmethod def compute(cls, real_data, synthetic_data): - """Compute the key uniqueness of two columns. + """Compute the key uniqueness metric. Args: real_data (pandas.Series): @@ -61,7 +61,7 @@ def compute(cls, real_data, synthetic_data): Returns: float: - The key uniqueness of the two columns. + The proportion of data points in the synthetic data that are unique. """ return cls.compute_breakdown(real_data, synthetic_data)['score'] diff --git a/tests/unit/single_column/statistical/test_key_uniqueness.py b/tests/unit/single_column/statistical/test_key_uniqueness.py index 07ff7765..797e75e9 100644 --- a/tests/unit/single_column/statistical/test_key_uniqueness.py +++ b/tests/unit/single_column/statistical/test_key_uniqueness.py @@ -31,7 +31,7 @@ def test_compute_breakdown_with_duplicates_in_real_data(self): synthetic_data = pd.Series([1, 2, np.nan, 3, np.nan, 5, 2, np.nan, 6, None]) metric = KeyUniqueness() - # Run + # Run and Assert expected_message = 'The real data contains NA or duplicate values.' with pytest.raises(InvalidDataError, match=expected_message): metric.compute_breakdown(real_data, synthetic_data) @@ -49,6 +49,7 @@ def test_compute(self, compute_breakdown_mock): result = metric.compute(real_data, synthetic_data) # Assert + compute_breakdown_mock.assert_called_once_with(real_data, synthetic_data) assert result == 0.6 @patch('sdmetrics.single_column.statistical.key_uniqueness.SingleColumnMetric.normalize')