Skip to content

Commit

Permalink
definition 2
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo committed Oct 30, 2023
1 parent c8407b9 commit 05a800e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 62 deletions.
2 changes: 2 additions & 0 deletions sdmetrics/reports/multi_table/_properties/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sdmetrics.reports.multi_table._properties.column_shapes import ColumnShapes
from sdmetrics.reports.multi_table._properties.coverage import Coverage
from sdmetrics.reports.multi_table._properties.inter_table_trends import InterTableTrends
from sdmetrics.reports.multi_table._properties.relationship_validity import RelationshipValidity
from sdmetrics.reports.multi_table._properties.synthesis import Synthesis

__all__ = [
Expand All @@ -18,4 +19,5 @@
'Coverage',
'InterTableTrends',
'Synthesis',
'RelationshipValidity',
]
24 changes: 8 additions & 16 deletions sdmetrics/reports/multi_table/_properties/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ def _get_num_iterations(self, metadata):
iterations += (len(parent_columns) * len(child_columns))
return iterations

@staticmethod
def _extract_tuple(data, relation):
parent_data = data[relation['parent_table_name']]
child_data = data[relation['child_table_name']]
return (
parent_data[relation['parent_primary_key']], child_data[relation['child_foreign_key']]
)

def _compute_average(self):
"""Average the scores for each column."""
is_dataframe = isinstance(self.details, pd.DataFrame)
Expand Down Expand Up @@ -137,22 +145,6 @@ def get_visualization(self, table_name):

return self._properties[table_name].get_visualization()

def _get_details_for_table_name_with_relationships(self, table_name):
"""Return the details for the given table name.
Args:
table_name (str):
Table name to get the details for.
Returns:
pandas.DataFrame:
The details for the given table name.
"""
if all(column in self.details.columns for column in ['Child Table', 'Parent Table']):
is_child = self.details['Child Table'] == table_name
is_parent = self.details['Parent Table'] == table_name
return self.details[is_child | is_parent].copy()

def get_details(self, table_name=None):
"""Return the details table for the property for the given table.
Expand Down
55 changes: 9 additions & 46 deletions sdmetrics/reports/multi_table/_properties/relationship_validity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
import pandas as pd
import plotly.express as px

from sdmetrics.multi_table.statistical import CardinalityShapeSimilarity
from sdmetrics.column_pairs.statistical import ReferentialIntegrity
from sdmetrics.column_pairs.statistical import CardinalityBoundaryAdherence
from sdmetrics.column_pairs.statistical import CardinalityBoundaryAdherence, ReferentialIntegrity
from sdmetrics.reports.multi_table._properties.base import BaseMultiTableProperty
from sdmetrics.reports.utils import PlotConfig


class Relationship_Validity(BaseMultiTableProperty):
class RelationshipValidity(BaseMultiTableProperty):
"""``Relationship Validity`` class.
This property measures the validity of the relationship
Expand All @@ -19,13 +17,6 @@ class Relationship_Validity(BaseMultiTableProperty):

_num_iteration_case = 'relationship'

def _extract_tuple(data, relation):
parent_data = data[relation['parent_table_name']]
child_data = data[relation['child_table_name']]
return (
parent_data[relation['parent_primary_key']], child_data[relation['child_foreign_key']]
)

def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=None):
"""Get the average score of relationship validity in the given tables.
Expand Down Expand Up @@ -77,44 +68,15 @@ def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=No
self.details = pd.DataFrame({
'Parent Table': parent_tables,
'Child Table': child_tables,
'Primary key': primary_key,
'Foreign key': foreign_key,
'Primary Key': primary_key,
'Foreign Key': foreign_key,
'Metric': metric_names,
'Score': scores,
'Error': error_messages,
})

def _get_details_for_table_name_with_relationships(self, table_name):
"""Return the details for the given table name.
Args:
table_name (str):
Table name to get the details for.
Returns:
pandas.DataFrame:
The details for the given table name.
"""
is_child = self.details['Child Table'] == table_name
is_parent = self.details['Parent Table'] == table_name
return self.details[is_child | is_parent].copy()

def get_details(self, table_name=None):
"""Return the details for the property.
Args:
table_name (str):
Table name to get the details for.
Defaults to ``None``.
Returns:
pandas.DataFrame:
The details for the property.
"""
if table_name is None:
return self.details.copy()

return self._get_details_for_table_name_with_relationships(table_name)
if self.details['Error'].isna().all():
self.details = self.details.drop('Error', axis=1)

def _get_table_relationships_plot(self, table_name):
"""Get the table relationships plot from the parent child relationship scores for a table.
Expand All @@ -126,7 +88,7 @@ def _get_table_relationships_plot(self, table_name):
Returns:
plotly.graph_objects._figure.Figure
"""
plot_data = self._get_details_for_table_name_with_relationships(table_name).copy()
plot_data = self.get_details(table_name).copy()
column_name = 'Child → Parent Relationship'
plot_data[column_name] = plot_data['Child Table'] + ' → ' + plot_data['Parent Table']
plot_data = plot_data.drop(['Child Table', 'Parent Table'], axis=1)
Expand All @@ -139,13 +101,14 @@ def _get_table_relationships_plot(self, table_name):
y='Score',
title=f'Table Relationships (Average Score={average_score})',
color='Metric',
color_discrete_sequence=[PlotConfig.DATACEBO_DARK],
color_discrete_sequence=[PlotConfig.DATACEBO_DARK, PlotConfig.DATACEBO_BLUE],
hover_name='Child → Parent Relationship',
hover_data={
'Child → Parent Relationship': False,
'Metric': True,
'Score': True,
},
barmode='group'
)

fig.update_yaxes(range=[0, 1])
Expand Down

0 comments on commit 05a800e

Please sign in to comment.