From 0894da3e0f2c724c053a2c2aea118b9e28d17fcf Mon Sep 17 00:00:00 2001 From: lajohn4747 Date: Thu, 26 Oct 2023 12:42:39 -0500 Subject: [PATCH 1/2] Don't call string replace on an empty dataframe --- .../reports/multi_table/_properties/inter_table_trends.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdmetrics/reports/multi_table/_properties/inter_table_trends.py b/sdmetrics/reports/multi_table/_properties/inter_table_trends.py index 012043f0..4dcad9e1 100644 --- a/sdmetrics/reports/multi_table/_properties/inter_table_trends.py +++ b/sdmetrics/reports/multi_table/_properties/inter_table_trends.py @@ -130,12 +130,13 @@ def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=No denormalized_real, denormalized_synthetic, merged_metadata, progress_bar=progress_bar, column_pairs=parent_child_pairs ) + details['Parent Table'] = parent details['Child Table'] = child details['Foreign Key'] = foreign_key - details['Column 1'] = details['Column 1'].str.replace(f'{parent}.', '', n=1) - details['Column 2'] = details['Column 2'].str.replace(f'{child}.', '', n=1) - + if not details.empty: + details['Column 1'] = details['Column 1'].str.replace(f'{parent}.', '', n=1) + details['Column 2'] = details['Column 2'].str.replace(f'{child}.', '', n=1) all_details.append(details) self.details = pd.concat(all_details, axis=0).reset_index(drop=True) From 888897c9072c4a1445e5f0dd26f8f9a21622f7fc Mon Sep 17 00:00:00 2001 From: lajohn4747 Date: Fri, 27 Oct 2023 00:21:33 -0500 Subject: [PATCH 2/2] Add unit test to catch empty dataframe due to no categorical data --- .../_properties/test_inter_table_trends.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/tests/unit/reports/multi_table/_properties/test_inter_table_trends.py b/tests/unit/reports/multi_table/_properties/test_inter_table_trends.py index b2f3144b..fe4398b1 100644 --- a/tests/unit/reports/multi_table/_properties/test_inter_table_trends.py +++ b/tests/unit/reports/multi_table/_properties/test_inter_table_trends.py @@ -164,6 +164,126 @@ def test__generate_details(column_pair_trends_mock): pd.testing.assert_frame_equal(instance.details, expected_details) +@patch('sdmetrics.reports.multi_table._properties.inter_table_trends.SingleTableColumnPairTrends') +def test__generate_details_empty_column_generate(column_pair_trends_mock): + """Test the ``get_score`` method.""" + # Setup + instance = InterTableTrends() + real_user_df = pd.DataFrame({ + 'user_id': ['user1', 'user2'], + }) + synthetic_user_df = pd.DataFrame({ + 'user_id': ['user1', 'user2'], + }) + real_session_df = pd.DataFrame({ + 'session_id': ['session1', 'session2', 'session3'], + 'user_id': ['user1', 'user1', 'user2'], + }) + synthetic_session_df = pd.DataFrame({ + 'session_id': ['session1', 'session2', 'session3'], + 'user_id': ['user1', 'user1', 'user2'], + }) + + metadata = { + 'tables': { + 'users': { + 'primary_key': 'user_id', + 'columns': { + 'user_id': {'sdtype': 'id'} + }, + }, + 'sessions': { + 'primary_key': 'session_id', + 'columns': { + 'session_id': {'sdtype': 'id'}, + 'user_id': {'sdtype': 'id'} + } + } + }, + 'relationships': [ + { + 'parent_table_name': 'users', + 'child_table_name': 'sessions', + 'parent_primary_key': 'user_id', + 'child_foreign_key': 'user_id' + } + ] + } + instanced_mock = column_pair_trends_mock.return_value + instanced_mock._generate_details.return_value = pd.DataFrame({ + 'Column 1': [], + 'Column 2': [], + 'Metric': [], + 'Score': [], + 'Real Correlation': [], + 'Synthetic Correlation': [], + 'Error': [] + }) + + # Run + instance._generate_details( + real_data={'users': real_user_df, 'sessions': real_session_df}, + synthetic_data={'users': synthetic_user_df, 'sessions': synthetic_session_df}, + metadata=metadata + ) + + # Assert + expected_denormalized_real = pd.DataFrame({ + 'sessions.session_id': ['session1', 'session2', 'session3'], + 'sessions.user_id': ['user1', 'user1', 'user2'], + 'users.user_id': ['user1', 'user1', 'user2'], + }) + expected_denormalized_synthetic = pd.DataFrame({ + 'sessions.session_id': ['session1', 'session2', 'session3'], + 'sessions.user_id': ['user1', 'user1', 'user2'], + 'users.user_id': ['user1', 'user1', 'user2'], + }) + expected_merged_metadata = { + 'primary_key': 'sessions.session_id', + 'columns': { + 'sessions.session_id': {'sdtype': 'id'}, + 'sessions.user_id': {'sdtype': 'id'}, + 'users.user_id': {'sdtype': 'id'}, + }, + } + expected_column_pairs = itertools.product( + ['users.user_id'], + ['sessions.session_id', 'sessions.user_id'] + ) + expected_details = pd.DataFrame({ + 'Parent Table': [], + 'Child Table': [], + 'Foreign Key': [], + 'Column 1': [], + 'Column 2': [], + 'Metric': [], + 'Score': [], + 'Real Correlation': [], + 'Synthetic Correlation': [], + 'Error': [] + }).astype({ + 'Parent Table': 'object', + 'Child Table': 'object', + 'Foreign Key': 'object', + 'Column 1': 'float64', + 'Column 2': 'float64', + 'Metric': 'float64', + 'Score': 'float64', + 'Real Correlation': 'float64', + 'Synthetic Correlation': 'float64', + 'Error': 'float64' + }) + + instanced_mock._generate_details.assert_called_once_with( + DataFrameMatcher(expected_denormalized_real), + DataFrameMatcher(expected_denormalized_synthetic), + expected_merged_metadata, + progress_bar=None, + column_pairs=IteratorMatcher(expected_column_pairs) + ) + pd.testing.assert_frame_equal(instance.details, expected_details) + + @patch('sdmetrics.reports.multi_table._properties.inter_table_trends.px') def test_get_visualization(plotly_mock): """Test the ``get_visualization`` method."""