From 56c807ec663ac73aed60644dd89ad4bd3debbf9f Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 16 Jul 2024 11:19:14 -0400 Subject: [PATCH 1/3] Compute Mann-Whitney U test (nonparametric, independent samples) between site 1 and site 2 --- baselines/generate_figures.py | 40 +++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/baselines/generate_figures.py b/baselines/generate_figures.py index 113cd70..823d8f7 100644 --- a/baselines/generate_figures.py +++ b/baselines/generate_figures.py @@ -37,7 +37,7 @@ import ptitprince as pt from functools import reduce -from scipy.stats import wilcoxon, normaltest, kruskal +from scipy.stats import wilcoxon, mannwhitneyu, normaltest, kruskal from statsmodels.stats.multitest import multipletests # Initialize logging @@ -485,6 +485,38 @@ def compute_wilcoxon_test(df_concat, list_of_metrics): f'formatted p{format_pvalue(p)}, unformatted p={p:0.6f}') +def compute_mann_whitney_u_test(df_concat, list_of_metrics): + """ + Compute Mann-Whitney U test (nonparametric, independent samples) between site 1 and site 2 + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html + :param df_concat: dataframe containing all the data + :param list_of_metrics: list of metrics to compute the Mann-Whitney U test for + :return: + """ + + logger.info('') + + # Remove 'NbTestedLesions' and 'VolTestedLesions' from the list of metrics + list_of_metrics = [metric for metric in list_of_metrics if metric not in ['NbTestedLesions', 'VolTestedLesions']] + + # Loop across nnunet_2d, nnunet_3d + for method in df_concat['method'].unique(): + # Loop across metrics + for metric in list_of_metrics: + # Prepare the data + df_site1_metric = df_concat[(df_concat['site'] == 'zurich') & (df_concat['method'] == method)][metric] + df_site2_metric = df_concat[(df_concat['site'] == 'colorado') & (df_concat['method'] == method)][metric] + + # Drop nan + df_site1_metric = df_site1_metric.dropna() + df_site2_metric = df_site2_metric.dropna() + + # Compute Mann-Whitney U test + stat, p = mannwhitneyu(df_site1_metric, df_site2_metric, alternative='two-sided') + logger.info(f'{metric}, {method}: Mann-Whitney U test between Zurich and Colorado: ' + f'formatted p{format_pvalue(p)}, unformatted p={p:0.6f}') + + def compute_kruskal_wallis_test(df_concat, list_of_metrics): """ Compute Kruskal-Wallis H-test (non-parametric version of ANOVA) @@ -648,9 +680,13 @@ def main(): # Print colorado subjects with Dice=0 print_colorado_subjects_with_dice_0(df_concat) - # For lesions, compute Wilcoxon signed-rank test test between nnunet_3d and nnunet_2d + # For lesions: + # - compute Wilcoxon signed-rank test (nonparametric, paired) between nnunet_3d and nnunet_2d + # - compute Mann-Whitney U test (nonparametric, independent samples) between site 1 and site 2 if pred_type == 'lesion': compute_wilcoxon_test(df_concat, list_of_metrics) + # site 1 vs site 2 + compute_mann_whitney_u_test(df_concat, list_of_metrics) # For SC, compute Kruskal-Wallis H-test (we have 6 methods) else: compute_kruskal_wallis_test(df_concat, list_of_metrics) From c53237e8c58069ababd8cec158cadf7846fa7f9e Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 16 Jul 2024 11:19:35 -0400 Subject: [PATCH 2/3] Drop filename column (to avoid error for the following '.groupby' and '.mean()' commands) --- baselines/generate_figures.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/baselines/generate_figures.py b/baselines/generate_figures.py index 823d8f7..5717060 100644 --- a/baselines/generate_figures.py +++ b/baselines/generate_figures.py @@ -666,6 +666,9 @@ def main(): # Concatenate the list of dataframes into a single dataframe df_concat = pd.concat(list_of_df, ignore_index=True) + # Drop filename column (to avoid error for the following '.groupby' and '.mean()' commands) + df_concat = df_concat.drop(columns=['filename']) + # If a participant_id is duplicated (because the test image is presented across multiple seeds), average the # metrics across seeds for the same subject. df_concat = df_concat.groupby(['participant_id', 'session_id', 'site', 'method']).mean().reset_index() From 68dcd95006d1d29a125003dbdcfa68dfc7247bed Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 16 Jul 2024 11:19:51 -0400 Subject: [PATCH 3/3] Improve docstring; add comment --- baselines/generate_figures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/baselines/generate_figures.py b/baselines/generate_figures.py index 5717060..8631b0f 100644 --- a/baselines/generate_figures.py +++ b/baselines/generate_figures.py @@ -440,7 +440,8 @@ def print_colorado_subjects_with_dice_0(df_concat): def compute_wilcoxon_test(df_concat, list_of_metrics): """ - Compute Wilcoxon signed-rank test (two related paired samples -- a same subject for nnunet_3d vs nnunet_2d) + Compute Wilcoxon signed-rank test (nonparametric, paired -- two related paired samples -- a same subject for + nnunet_3d vs nnunet_2d) https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html :param df_concat: dataframe containing all the data :param list_of_metrics: list of metrics to compute the Wilcoxon test for @@ -687,6 +688,7 @@ def main(): # - compute Wilcoxon signed-rank test (nonparametric, paired) between nnunet_3d and nnunet_2d # - compute Mann-Whitney U test (nonparametric, independent samples) between site 1 and site 2 if pred_type == 'lesion': + # nnunet_3d and nnunet_2d compute_wilcoxon_test(df_concat, list_of_metrics) # site 1 vs site 2 compute_mann_whitney_u_test(df_concat, list_of_metrics)