From 4c33c31746f9acfbe7419fae9f003138907ea0e4 Mon Sep 17 00:00:00 2001 From: Jan Bachmann Date: Wed, 11 Sep 2024 22:06:37 +0300 Subject: [PATCH] Update label query --- netin/stats/distributions.py | 2 +- netin/stats/networks.py | 7 ++++--- netin/stats/ranking.py | 2 +- netin/viz/handlers.py | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/netin/stats/distributions.py b/netin/stats/distributions.py index 812103e2..78155512 100644 --- a/netin/stats/distributions.py +++ b/netin/stats/distributions.py @@ -146,7 +146,7 @@ def get_disparity(df: pd.DataFrame, x: str, total: float = None) -> (np.ndarray, gx, gy = get_gini_coefficient(df, x, total) fx, fy = get_fraction_of_minority(df, x, total) - f_m = df.query("class_label == @const.MINORITY_LABEL").shape[0] / df.shape[0] + f_m = df.query("real_label == @const.MINORITY_LABEL").shape[0] / df.shape[0] inequality_y = ranking.get_ranking_inequality(gy) inequity_x = ranking.get_ranking_inequity(f_m, fy) diff --git a/netin/stats/networks.py b/netin/stats/networks.py index f867624b..6b6c4c88 100644 --- a/netin/stats/networks.py +++ b/netin/stats/networks.py @@ -101,9 +101,10 @@ def get_node_metadata_as_dataframe( .get_node_class(node_class_values) class_values = node_class_values.get_class_values() - obj = {'node': list(graph.nodes()), - 'class_label': [node_class_values[n] for n in graph.nodes()], - 'real_label': [class_values[n] for n in graph.nodes()]} + l_nodes = list(graph.nodes()) + obj = {'node': l_nodes, + 'class_label': [node_class_values[n] for n in l_nodes], + 'real_label': [class_values[n] for n in l_nodes]} # include graph metadata if include_graph_metadata: diff --git a/netin/stats/ranking.py b/netin/stats/ranking.py index 66db56bd..389ba181 100644 --- a/netin/stats/ranking.py +++ b/netin/stats/ranking.py @@ -84,7 +84,7 @@ def get_fraction_of_minority_in_ranking(df: pd.DataFrame, x: str) -> \ column = f"{x}_rank" tmp = df.query(f"{column} <= @rank").copy() total = tmp.shape[0] - efm = np.nan if total == 0 else tmp.query("class_label == @const.MINORITY_LABEL").shape[0] / total + efm = np.nan if total == 0 else tmp.query("real_label == @const.MINORITY_LABEL").shape[0] / total ys.append(efm) return xs, ys diff --git a/netin/viz/handlers.py b/netin/viz/handlers.py index b0f07873..ccd8cc54 100644 --- a/netin/viz/handlers.py +++ b/netin/viz/handlers.py @@ -494,7 +494,7 @@ def plot_distribution(data: Union[pd.DataFrame, List[pd.DataFrame]], class_label: str iter_groups = df.groupby(hue) if hue is not None else [(None, df)] - f_m = df.query("class_label == @const.MINORITY_LABEL").shape[0] / df.shape[0] + f_m = df.query("real_label == @const.MINORITY_LABEL").shape[0] / df.shape[0] for class_label, group in iter_groups: total = df[_col_name].sum()\ if common_norm else group[_col_name].sum() @@ -717,7 +717,7 @@ def get_me_label(f_m, ys, beta=None) -> Tuple[str, float, str]: def show_minority(axline, data): axline( - data.query("class_label==@const.MINORITY_LABEL")\ + data.query("real_label==@const.MINORITY_LABEL")\ .shape[0] / data.shape[0], color="black", linestyle='--')