docs: improve docstring docs #13

francescopisu · Sep 23, 2023 · 2fd7c6b · 2fd7c6b
1 parent 0cabbb0
commit 2fd7c6b
Show file tree

Hide file tree

Showing 4 changed files with 429 additions and 153 deletions.
diff --git a/src/modelsight/_typing.py b/src/modelsight/_typing.py
@@ -1,3 +1,7 @@
+"""
+This file deals with the implementation of custom types.
+"""
+
 import sys
 import random
 import numpy as np
@@ -24,7 +28,37 @@
 
 
 @dataclass
-class CVModellingOutput: 
+class CVModellingOutput:
+ """This class stores the data generated by a cross-validation
+ process for a single estimator.
+ 
+ Arguments
+ ---------
+ gts_train: ArrayLike
+ A (n_repetitions * n_outer_splits) list of arrays representing training ground-truth.
+ gts_val: ArrayLike
+ A (n_repetitions * n_outer_splits) list of arrays representing validation ground-truth.
+ gts_train_conc: ArrayLike
+ A list of (n_repetitions * n_outer_splits) data points representing pooled training ground-truth.
+ gts_val_conc: ArrayLike
+ A list of (n_repetitions * n_outer_splits) data points representing pooled validation ground-truth.
+ probas_train: ArrayLike
+ A (n_repetitions * n_outer_splits) list of arrays representing training predicted probabilities.
+ probas_val: ArrayLike
+ A (n_repetitions * n_outer_splits) list of arrays representing validation predicted probabilities.
+ probas_train_conc: ArrayLike
+ A list of (n_repetitions * n_outer_splits) data points representing pooled training predicted probabilities.
+ probas_val_conc: ArrayLike
+ A list of (n_repetitions * n_outer_splits) data points representing pooled validation predicted probabilities.
+ models: List[Estimator]
+ A list of (n_repetitions * n_outer_splits) fitted estimators.
+ errors: Optional[ArrayLike]
+ A (n_repetitions * n_outer_splits) list of validation prediction errors.
+ correct: Optional[ArrayLike]
+ A (n_repetitions * n_outer_splits) list of validation correct predictions.
+ features: Optional[ArrayLike]
+ A (n_repetitions * n_outer_splits) list of subsets of selected features.
+ """
  gts_train: ArrayLike
  gts_val: ArrayLike
  gts_train_conc: ArrayLike

diff --git a/src/modelsight/calibration/calib.py b/src/modelsight/calibration/calib.py
@@ -1,3 +1,8 @@
+"""
+This file deals with the implementation of the Hosmer-Lemeshow plot for the
+assessment of calibration of predicted probabilities.
+"""
+
 import numpy as np
 from typing import Tuple
 import matplotlib.pyplot as plt
@@ -6,11 +11,14 @@
 
 
 def ntile_name(n: int) -> str:
- """Returns the ntile name corresponding to an ntile integer.
+ """
+ Returns the ntile name corresponding to an ntile integer.
+ 
  Parameters
  ----------
  n : int
  An ntile integer.
+ 
  Returns
  -------
  ntile_name : str
@@ -30,13 +38,16 @@ def ntile_name(n: int) -> str:
 
 def make_recarray(y_true: ArrayLike,
  y_pred: ArrayLike) -> np.recarray:
- """Combines arrays into a recarray.
+ """
+ Combines arrays into a recarray.
+ 
  Parameters
  ----------
  y_true : array
  Observed labels, either 0 or 1.
  y_pred : array
  Predicted probabilities, floats on [0, 1].
+ 
  Returns
  -------
  table : recarray
@@ -53,7 +64,9 @@ def make_recarray(y_true: ArrayLike,
 def hosmer_lemeshow_table(y_true: ArrayLike,
  y_pred: ArrayLike,
  n_bins: int = 10) -> np.recarray:
- """Constructs a Hosmer–Lemeshow table.
+ """
+ Constructs a Hosmer–Lemeshow table.
+ 
  Parameters
  ----------
  y_true : array
@@ -63,6 +76,7 @@ def hosmer_lemeshow_table(y_true: ArrayLike,
  n_bins : int, optional
  The number of groups to create. The default value is 10, which
  corresponds to deciles of predicted probabilities.
+ 
  Returns
  -------
  table : recarray
@@ -100,26 +114,28 @@ def hosmer_lemeshow_plot(y_true: ArrayLike,
 
  Parameters
  ----------
- y_true: ArrayLike
+ y_true : ArrayLike
  (n_obs,) shaped array of ground-truth values
- y_pred: ArrayLike
+ y_pred : ArrayLike
  (n_obs,) shaped array of predicted probabilities
- n_bins: int
+ n_bins : int
  Number of bins to group observed and predicted probabilities into
- colors: Tuple[str, str]
+ colors : Tuple[str, str]
  Pair of colors for observed (line) and predicted (vertical bars) probabilities.
- annotate_bars: bool
+ annotate_bars : bool
  Whether bars should be annotated with the number of observed probabilities in each bin.
- title: str
+ title : str
  Title to display on top of the calibration plot.
- brier_score_annot: str
+ brier_score_annot : str
  Optional brier score (95% CI) annotation on the top-left corner.
- ax: plt.Axes
+ ax : plt.Axes
  A matplotlib Axes object to draw the calibration plot into. If None, an Axes object is created by default.
+ 
  Returns
  -------
- Tuple[plt.Figure, plt.Axes]:
- Corresponding figure and Axes
+ f, ax : Tuple[plt.Figure, plt.Axes]
+ f: pyplot figure
+ ax: pyplot Axes
  """
  table = hosmer_lemeshow_table(y_true, y_pred, n_bins)
  # transform observed and predicted frequencies in percentage relative to the bin dimension