From fec818b68a88ff8f71b29e3438572ad300e6161c Mon Sep 17 00:00:00 2001 From: Francesco Pisu Date: Sat, 23 Sep 2023 19:00:42 +0200 Subject: [PATCH] docs: improve docstring docs #13 --- src/modelsight/_typing.py | 36 +++- src/modelsight/calibration/calib.py | 42 +++-- src/modelsight/curves/_delong.py | 259 ++++++++++++++++------------ src/modelsight/curves/compare.py | 245 ++++++++++++++++++++++---- 4 files changed, 429 insertions(+), 153 deletions(-) diff --git a/src/modelsight/_typing.py b/src/modelsight/_typing.py index e9ef2e4..a87c1ba 100644 --- a/src/modelsight/_typing.py +++ b/src/modelsight/_typing.py @@ -1,3 +1,7 @@ +""" +This file deals with the implementation of custom types. +""" + import sys import random import numpy as np @@ -24,7 +28,37 @@ @dataclass -class CVModellingOutput: +class CVModellingOutput: + """This class stores the data generated by a cross-validation + process for a single estimator. + + Arguments + --------- + gts_train: ArrayLike + A (n_repetitions * n_outer_splits) list of arrays representing training ground-truth. + gts_val: ArrayLike + A (n_repetitions * n_outer_splits) list of arrays representing validation ground-truth. + gts_train_conc: ArrayLike + A list of (n_repetitions * n_outer_splits) data points representing pooled training ground-truth. + gts_val_conc: ArrayLike + A list of (n_repetitions * n_outer_splits) data points representing pooled validation ground-truth. + probas_train: ArrayLike + A (n_repetitions * n_outer_splits) list of arrays representing training predicted probabilities. + probas_val: ArrayLike + A (n_repetitions * n_outer_splits) list of arrays representing validation predicted probabilities. + probas_train_conc: ArrayLike + A list of (n_repetitions * n_outer_splits) data points representing pooled training predicted probabilities. + probas_val_conc: ArrayLike + A list of (n_repetitions * n_outer_splits) data points representing pooled validation predicted probabilities. + models: List[Estimator] + A list of (n_repetitions * n_outer_splits) fitted estimators. + errors: Optional[ArrayLike] + A (n_repetitions * n_outer_splits) list of validation prediction errors. + correct: Optional[ArrayLike] + A (n_repetitions * n_outer_splits) list of validation correct predictions. + features: Optional[ArrayLike] + A (n_repetitions * n_outer_splits) list of subsets of selected features. + """ gts_train: ArrayLike gts_val: ArrayLike gts_train_conc: ArrayLike diff --git a/src/modelsight/calibration/calib.py b/src/modelsight/calibration/calib.py index ba9cbd4..37f9a60 100644 --- a/src/modelsight/calibration/calib.py +++ b/src/modelsight/calibration/calib.py @@ -1,3 +1,8 @@ +""" +This file deals with the implementation of the Hosmer-Lemeshow plot for the +assessment of calibration of predicted probabilities. +""" + import numpy as np from typing import Tuple import matplotlib.pyplot as plt @@ -6,11 +11,14 @@ def ntile_name(n: int) -> str: - """Returns the ntile name corresponding to an ntile integer. + """ + Returns the ntile name corresponding to an ntile integer. + Parameters ---------- n : int An ntile integer. + Returns ------- ntile_name : str @@ -30,13 +38,16 @@ def ntile_name(n: int) -> str: def make_recarray(y_true: ArrayLike, y_pred: ArrayLike) -> np.recarray: - """Combines arrays into a recarray. + """ + Combines arrays into a recarray. + Parameters ---------- y_true : array Observed labels, either 0 or 1. y_pred : array Predicted probabilities, floats on [0, 1]. + Returns ------- table : recarray @@ -53,7 +64,9 @@ def make_recarray(y_true: ArrayLike, def hosmer_lemeshow_table(y_true: ArrayLike, y_pred: ArrayLike, n_bins: int = 10) -> np.recarray: - """Constructs a Hosmer–Lemeshow table. + """ + Constructs a Hosmer–Lemeshow table. + Parameters ---------- y_true : array @@ -63,6 +76,7 @@ def hosmer_lemeshow_table(y_true: ArrayLike, n_bins : int, optional The number of groups to create. The default value is 10, which corresponds to deciles of predicted probabilities. + Returns ------- table : recarray @@ -100,26 +114,28 @@ def hosmer_lemeshow_plot(y_true: ArrayLike, Parameters ---------- - y_true: ArrayLike + y_true : ArrayLike (n_obs,) shaped array of ground-truth values - y_pred: ArrayLike + y_pred : ArrayLike (n_obs,) shaped array of predicted probabilities - n_bins: int + n_bins : int Number of bins to group observed and predicted probabilities into - colors: Tuple[str, str] + colors : Tuple[str, str] Pair of colors for observed (line) and predicted (vertical bars) probabilities. - annotate_bars: bool + annotate_bars : bool Whether bars should be annotated with the number of observed probabilities in each bin. - title: str + title : str Title to display on top of the calibration plot. - brier_score_annot: str + brier_score_annot : str Optional brier score (95% CI) annotation on the top-left corner. - ax: plt.Axes + ax : plt.Axes A matplotlib Axes object to draw the calibration plot into. If None, an Axes object is created by default. + Returns ------- - Tuple[plt.Figure, plt.Axes]: - Corresponding figure and Axes + f, ax : Tuple[plt.Figure, plt.Axes] + f: pyplot figure + ax: pyplot Axes """ table = hosmer_lemeshow_table(y_true, y_pred, n_bins) # transform observed and predicted frequencies in percentage relative to the bin dimension diff --git a/src/modelsight/curves/_delong.py b/src/modelsight/curves/_delong.py index 77dd1f3..bdffa34 100644 --- a/src/modelsight/curves/_delong.py +++ b/src/modelsight/curves/_delong.py @@ -1,124 +1,169 @@ +""" +This file deals with the implementation of the DeLong test for the comparison of +pairs of correlated areas under the receiver-operating characteristics curves. +""" + import pandas as pd import numpy as np import scipy.stats +from typing import Tuple # AUC comparison adapted from # https://github.com/Netflix/vmaf/ -def compute_midrank(x): - """Computes midranks. - Args: - x - a 1D numpy array - Returns: - array of midranks - """ - J = np.argsort(x) - Z = x[J] - N = len(x) - T = np.zeros(N, dtype=np.float64) - i = 0 - while i < N: - j = i - while j < N and Z[j] == Z[i]: - j += 1 - T[i:j] = 0.5*(i + j - 1) - i = j - T2 = np.empty(N, dtype=np.float64) - # Note(kazeevn) +1 is due to Python using 0-based indexing - # instead of 1-based in the AUC formula in the paper - T2[J] = T + 1 - return T2 +def compute_midrank(x: np.ndarray) -> np.ndarray: + """ + Computes midranks. + + Parameters + ---------- + x : np.ndarray + a 1-d array of predicted probabilities. + + Returns + ------- + T2 : np.ndarray + array of midranks + """ + J = np.argsort(x) + Z = x[J] + N = len(x) + T = np.zeros(N, dtype=np.float64) + i = 0 + while i < N: + j = i + while j < N and Z[j] == Z[i]: + j += 1 + T[i:j] = 0.5*(i + j - 1) + i = j + T2 = np.empty(N, dtype=np.float64) + # Note(kazeevn) +1 is due to Python using 0-based indexing + # instead of 1-based in the AUC formula in the paper + T2[J] = T + 1 + return T2 -def fastDeLong(predictions_sorted_transposed, label_1_count): - """ - The fast version of DeLong's method for computing the covariance of - unadjusted AUC. - Args: - predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples] - sorted such as the examples with label "1" are first - Returns: - (AUC value, DeLong covariance) - Reference: - @article{sun2014fast, - title={Fast Implementation of DeLong's Algorithm for - Comparing the Areas Under Correlated Receiver Operating Characteristic Curves}, - author={Xu Sun and Weichao Xu}, - journal={IEEE Signal Processing Letters}, - volume={21}, - number={11}, - pages={1389--1393}, - year={2014}, - publisher={IEEE} - } - """ - # Short variables are named as they are in the paper - m = label_1_count - n = predictions_sorted_transposed.shape[1] - m - positive_examples = predictions_sorted_transposed[:, :m] - negative_examples = predictions_sorted_transposed[:, m:] - k = predictions_sorted_transposed.shape[0] +def fastDeLong(predictions_sorted_transposed: np.ndarray, + label_1_count: int) -> Tuple[np.ndarray, np.ndarray]: + """ + The fast version of DeLong's method for computing the covariance of + unadjusted AUC. + + Parameters + ---------- + predictions_sorted_transposed : a (n_classifiers, n_obs) numpy array containing + the predicted probabilities by the two classifiers in the comparison. + These probabilities are sorted such that the examples with label "1" come first. + + Returns + ------- + aucs, delongcov : Tuple[np.ndarray, np.ndarray] + aucs: array of AUC values + delongcov: array of DeLong covariance + + Reference + --------- + @article{sun2014fast, + title={Fast Implementation of DeLong's Algorithm for + Comparing the Areas Under Correlated Receiver Operating Characteristic Curves}, + author={Xu Sun and Weichao Xu}, + journal={IEEE Signal Processing Letters}, + volume={21}, + number={11}, + pages={1389--1393}, + year={2014}, + publisher={IEEE} + } + """ + # Short variables are named as they are in the paper + m = label_1_count + n = predictions_sorted_transposed.shape[1] - m + positive_examples = predictions_sorted_transposed[:, :m] + negative_examples = predictions_sorted_transposed[:, m:] + k = predictions_sorted_transposed.shape[0] - tx = np.empty([k, m], dtype=np.float64) - ty = np.empty([k, n], dtype=np.float64) - tz = np.empty([k, m + n], dtype=np.float64) - for r in range(k): - tx[r, :] = compute_midrank(positive_examples[r, :]) - ty[r, :] = compute_midrank(negative_examples[r, :]) - tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :]) - aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n - v01 = (tz[:, :m] - tx[:, :]) / n - v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m - sx = np.cov(v01) - sy = np.cov(v10) - delongcov = sx / m + sy / n - return aucs, delongcov + tx = np.empty([k, m], dtype=np.float64) + ty = np.empty([k, n], dtype=np.float64) + tz = np.empty([k, m + n], dtype=np.float64) + for r in range(k): + tx[r, :] = compute_midrank(positive_examples[r, :]) + ty[r, :] = compute_midrank(negative_examples[r, :]) + tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :]) + aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n + v01 = (tz[:, :m] - tx[:, :]) / n + v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m + sx = np.cov(v01) + sy = np.cov(v10) + delongcov = sx / m + sy / n + return aucs, delongcov -def calc_pvalue(aucs, sigma): - """Computes log(10) of p-values. - Args: - aucs: 1D array of AUCs - sigma: AUC DeLong covariances - Returns: - log10(pvalue) - """ - l = np.array([[1, -1]]) - z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T)) - return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10) +def calc_pvalue(aucs: np.ndarray, sigma: np.ndarray) -> float: + """ + Computes log(10) of p-values. + + Parameters + ---------- + aucs : np.array + a 1-d array of AUCs + sigma : np.array + an array AUC DeLong covariances + + Returns + ------- + p : float + log10(pvalue) + """ + l = np.array([[1, -1]]) + z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T)) + p = np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10) + return p -def compute_ground_truth_statistics(ground_truth): - assert np.array_equal(np.unique(ground_truth), [0, 1]) - order = (-ground_truth).argsort() - label_1_count = int(ground_truth.sum()) - return order, label_1_count +def compute_ground_truth_statistics(ground_truth: np.ndarray) -> Tuple[np.ndarray, int]: + """ + Compute statistics of ground-truth array. + Parameters + ---------- + ground_truth : np.ndarray + a (n_obs,) array of 0 and 1 values representing the ground-truth. -def delong_roc_variance(ground_truth, predictions): - """ - Computes ROC AUC variance for a single set of predictions - Args: - ground_truth: np.array of 0 and 1 - predictions: np.array of floats of the probability of being class 1 - """ - order, label_1_count = compute_ground_truth_statistics(ground_truth) - predictions_sorted_transposed = predictions[np.newaxis, order] - aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count) - assert len(aucs) == 1, "There is a bug in the code, please forward this to the developers" - return aucs[0], delongcov + Returns + ------- + order, label_1_count : Tuple[np.ndarray, int] + order is a numpy array of sorted indexes + label_1_count is the count of data points of the positive class. + """ + assert np.array_equal(np.unique(ground_truth), [0, 1]) + order = (-ground_truth).argsort() + label_1_count = int(ground_truth.sum()) + return order, label_1_count -def delong_roc_test(ground_truth, predictions_one, predictions_two): - """ - Computes log(p-value) for hypothesis that two ROC AUCs are different - Args: - ground_truth: np.array of 0 and 1 - predictions_one: predictions of the first model, - np.array of floats of the probability of being class 1 - predictions_two: predictions of the second model, - np.array of floats of the probability of being class 1 - """ - order, label_1_count = compute_ground_truth_statistics(ground_truth) - predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order] - aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count) - return 10**calc_pvalue(aucs, delongcov).item() \ No newline at end of file +def delong_roc_test(ground_truth: np.ndarray, + predictions_one: np.ndarray, + predictions_two: np.ndarray) -> float: + """ + Compare areas-under-curve of two estimators using the DeLong test. + Concretely, it computes the pvalue for hypothesis that two ROC AUCs are different. + + Parameters + ---------- + ground_truth : np.ndarray + a (n_obs,) array of 0 and 1 representing ground-truths. + predictions_one : np.ndarray + a (n_obs,) array of probabilities of class 1 predicted by the first model. + predictions_two : np.ndarray + a (n_obs,) array of probabilities of class 1 predicted by the second model. + + Returns + ------- + p : float + the p-value for hypothesis that two ROC AUCs are different. + """ + order, label_1_count = compute_ground_truth_statistics(ground_truth) + predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order] + aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count) + + p = 10**calc_pvalue(aucs, delongcov).item() + return p \ No newline at end of file diff --git a/src/modelsight/curves/compare.py b/src/modelsight/curves/compare.py index 4d748ec..aa298b8 100644 --- a/src/modelsight/curves/compare.py +++ b/src/modelsight/curves/compare.py @@ -1,37 +1,84 @@ +""" +This file deals with the implementation of functions that allow annotating plots +with statistical tests results between pairs of estimators. +""" + from typing import Callable, Dict, Tuple, List import matplotlib from matplotlib import patches import matplotlib.pyplot as plt -from scipy.stats import ttest_ind -from sklearn.metrics import average_precision_score from src.modelsight.curves._delong import delong_roc_test +from src.modelsight._typing import CVModellingOutput -def annot_stat_vertical(text, x, y1, y2, ww, - col='k', - fontsize=13, - voffset = 0, - n_elems = None, +def annot_stat_vertical(text:str, + x: float, + y1: float, y2: float, + ww: float = 0.02, + col: str = 'black', + fontsize: int = 13, + voffset: float = 0, + n_elems: int = None, ax=None, **kwargs): """ - ww: float - whisker width + Draw a vertical whisker at position `x` that spans through `y1` to `y2` with annotation specified + by `text`. + + Parameters + ---------- + text : str + Annotation for whisker. + x : float + x-position the whisker is positioned at. + y1 :float + starting y position. + y2 : float + ending y position. + ww : float, optional + whisker width, by default 0.02 + col : str, optional + whisker color, by default 'black' + fontsize : int, optional + fontsize for the annotation, by default 13 + voffset : float, optional + vertical offset for the annotation, by default 0. + Some font families and characters occupy different vertical spaces; + this parameter allows compensating for such variations. + n_elems : int, optional + number of discrete elements in the y-axis, by default None. + This value is precomputed by the caller (add_annotations) and passed + to this function as input. + ax : plt.Axes, optional + a pyplot Axes to draw annotations on, by default None + **kwargs + rect_h_base: float, optional + base height of rectangle patch for single-character annotations, by default 0.1 + fontsize_nonsignif, optional + fontsize for multi-character annotations (here called non significant annotations + to reflect the fact that single-character annotations most often use some kind + of symbol to denote statistical significance, e.g. *), by default `fontsize` (i.e., 13) """ ax = plt.gca() if ax is None else ax # we want the text to be centered on the whisker text_x_pos = x + ww - #+ 0.01 + text_y_pos = (y1+y2)/2 # draw whisker from y1 to y2 with width `ww` ax.plot([x, x + ww, x + ww, x], [y1, y1, y2, y2], lw=1, c=col) - if len(text) == 1: - #text_y_pos = (y1+y2)/2 - - # draw text at (text_x_pos, text_y_pos) # + 0.15 + # this is the case of a whisker being annotated with a single character. + # by default, symbols do not enforce a white background, hence when + # superimposed on whiskers the readibility is limited. + # here we enforce a white rectangle patch beneath the symbol to enhance + # readibility of annotations. + # the built-in bbox parameter of pyplot's .text() doesn't produce + # acceptable results, hence we came up with a custom implementation for + # single-character annotations. + if len(text) == 1: + # draw text at (text_x_pos, (text_y_pos - voffset) + 0.17) ax.text( text_x_pos, (text_y_pos - voffset) + 0.17, text, ha='center', va='center', color=col, @@ -61,6 +108,9 @@ def annot_stat_vertical(text, x, y1, y2, ww, ax.add_patch(rect) else: + # this is the case of multi-character annotations. + # here, we leverage the built-in bbox of pyplot's text method + # that allows drawing a bounding box beneath the annotation. fontsize_nonsignif = kwargs.pop("fontsize_nonsignif", fontsize) ax.text( text_x_pos, text_y_pos, text, @@ -73,15 +123,52 @@ def annot_stat_vertical(text, x, y1, y2, ww, ) ) -from matplotlib import patches -def annot_stat_horizontal(text, x1, x2, y, wh, col='k', fontsize=13, - voffset = 0, - n_elems = None, - ax=None, - **kwargs): + +def annot_stat_horizontal(text: str, + x1: float, x2: float, + y: float, + wh: float = 0.02, + col: str = "black", + fontsize: int = 13, + voffset: float = 0, + n_elems:int = None, + ax: plt.Axes = None, + **kwargs): """ - ww: float - whisker width + Draw an horizontal whisker at position `y` that spans through `x1` to `x2` with annotation specified + by `text`. + + Parameters + ---------- + text : str + Annotation for whisker. + x1 : float + starting x position. + x2 :float + ending x position. + y : float + y-position the whisker is positioned at. + wh : float, optional + whisker height, by default 0.02 + col : str, optional + whisker color, by default 'black' + fontsize : int, optional + fontsize for the annotation, by default 13 + voffset : float, optional + vertical offset for the annotation, by default 0. + Some font families and characters occupy different vertical spaces; + this parameter allows compensating for such variations. + n_elems : int, optional + number of discrete elements in the y-axis, by default None. + This value is precomputed by the caller (add_annotations) and passed + to this function as input. + ax : plt.Axes, optional + a pyplot Axes to draw annotations on, by default None + **kwargs + fontsize_nonsignif, optional + fontsize for multi-character annotations (here called non significant annotations + to reflect the fact that single-character annotations most often use some kind + of symbol to denote statistical significance, e.g. *), by default `fontsize` (i.e., 13) """ ax = plt.gca() if ax is None else ax @@ -93,10 +180,16 @@ def annot_stat_horizontal(text, x1, x2, y, wh, col='k', fontsize=13, # draw whisker from y1 to y2 with width `ww` ax.plot([x1, x1, x2, x2], [y, y + wh, y + wh, y], lw=1, c=col, clip_on=False) - - if len(text) == 1: - #text_y_pos = (y1+y2)/2 + # this is the case of a whisker being annotated with a single character. + # by default, symbols do not enforce a white background, hence when + # superimposed on whiskers the readibility is limited. + # here we enforce a white rectangle patch beneath the symbol to enhance + # readibility of annotations. + # the built-in bbox parameter of pyplot's .text() doesn't produce + # acceptable results, hence we came up with a custom implementation for + # single-character annotations. + if len(text) == 1: # draw text at (text_x_pos, text_y_pos) # + 0.15 ax.text( text_x_pos, text_y_pos + voffset, text, @@ -140,14 +233,12 @@ def annot_stat_horizontal(text, x1, x2, y, wh, col='k', fontsize=13, ) -from typing import Tuple, List, Dict - def add_annotations(comparisons: Dict[str, Tuple[str, str, float]], alpha: float, bars: matplotlib.container.BarContainer, direction: str, order: List[Tuple[str, str]], - symbol: str, + symbol: str = "*", symbol_fontsize: int = 22, voffset: float = 0, ext_voffset: float = 0, @@ -155,6 +246,54 @@ def add_annotations(comparisons: Dict[str, Tuple[str, str, float]], P_val_rounding: int = 2, ax: plt.Axes = None, **kwargs): + """ + Annotates the specified plot (`ax`) with the provided comparisons results either vertically or horizontally + depending on the value of `direction`. + + Parameters + ---------- + comparisons : Dict[str, Tuple[str, str, float]] + The results of models comparisons. + alpha : float + The significance level used for formatting the P value of comparisons. + bars : matplotlib.container.BarContainer + A list of matplotlib's bars that is used to access the bar's width or height + when annotating horizontally and vertically, respectively. + direction : str + The direction for annotation. Possible values are "horizontal" and "vertical". + order : List[Tuple[str, str]] + The order in which the comparisons should be displayed. + Each entry of this list is a tuple where elements are algorithm's names. + symbol : str, optional + The symbol used in place of the P value when statistical significance is achieved + accoring to the specified alpha, by default "*". + symbol_fontsize : int, optional + Fontsize for the symbol used when statistical significance is achieved, by default 22 + voffset : float, optional + vertical offset for the annotation, by default 0., by default 0 + ext_voffset : float, optional + Additional vertical offset for vertical annotations. + Ignored when direction = "horizontal", by default 0 + ext_hoffset : float, optional + Additional horizontal offset for horizontal annotations. + Ignored when direction = "vertical", by default 0 + P_val_rounding : int, optional + Number of decimal places to round P values at, by default 2 + ax : plt.Axes, optional + The plot to be annotated, by default None + + Returns + ------- + ax : plt.Axes + The annotated plot. + + Raises + ------ + ValueError + When ax is None + ValueError + Whenever a comparison key doesn't exist. + """ if not ax: raise ValueError("I need an Axes to draw comparisons on.") @@ -188,7 +327,7 @@ def add_annotations(comparisons: Dict[str, Tuple[str, str, float]], wh=0.02, col="black", fontsize=symbol_fontsize, - voffset = -0.02, + voffset = voffset, #-0.02 ext_offset = ext_hoffset, n_elems = len(entity_labels), ax=ax, @@ -219,16 +358,58 @@ def add_annotations(comparisons: Dict[str, Tuple[str, str, float]], return ax -def roc_single_comparison(cv_preds, fst_algo, snd_algo): +def roc_single_comparison(cv_preds: CVModellingOutput, + fst_algo: str, + snd_algo: str) -> Dict[str, Tuple[str, str, float]]: + """Perform a single comparison of two areas under Receiver Operating Characteristic curves + computed on the same set of data points by the DeLong test. + + Parameters + ---------- + cv_preds : CVModellingOutput + The output of a cross-validation process encompassing mulitple (n>=2) models. + fst_algo : str + The name of the first algorithm for the comparison. + Must be an existing key of `cv_preds`. + snd_algo : str + The name of the second algorithm for the comparison. + Must be an existing key of `cv_preds`. + + Returns + ------- + comparison_result : Dict[str, Tuple[str, str, float]] + The output of the comparison. This is a dictionary where the key is + of the form "_" and the value is a tuple of three + elements, the first two are the names of the algorithms being compared + and the third element is the P value for the null hypothesis that + the two AUC values are equal. + """ ground_truths = cv_preds[fst_algo].gts_val_conc fst_algo_probas = cv_preds[fst_algo].probas_val_conc snd_algo_probas = cv_preds[snd_algo].probas_val_conc P = delong_roc_test(ground_truths, fst_algo_probas, snd_algo_probas) cmp_key = f"{fst_algo}_{snd_algo}" - return {cmp_key: (fst_algo, snd_algo, P)} + comparison_result = {cmp_key: (fst_algo, snd_algo, P)} + return comparison_result -def roc_comparisons(cv_preds, target_algo): +def roc_comparisons(cv_preds: CVModellingOutput, + target_algo: str): + """ + Compares the AUC of the specified algorithm with the AUCs of all other algorithms. + + Parameters + ---------- + cv_preds : CVModellingOutput + The output of a cross-validation process encompassing mulitple (n>=2) models. + target_algo : str + The name of the target algorithm's whose AUC will be compared with all other AUCs. + + Returns + ------- + comparisons : Dict[str, Tuple[str, str, float]] + A dictionary containing the results of all comparisons. See output of `roc_single_comparison`. + """ comparisons = dict() for algo_name in cv_preds.keys():