distributions documentation

ayrna · Jan 26, 2024 · 21c3b5d · 21c3b5d
1 parent fe2a8a3
commit 21c3b5d
Show file tree

Hide file tree

Showing 7 changed files with 232 additions and 55 deletions.
diff --git a/dlordinal/distributions/__init__.py b/dlordinal/distributions/__init__.py
@@ -1,7 +1,10 @@
 from .beta_distribution import get_beta_probabilities
 from .binomial_distribution import get_binomial_probabilities
 from .exponential_distribution import get_exponential_probabilities
-from .general_triangular_distribution import get_general_triangular_probabilities
+from .general_triangular_distribution import (
+    get_general_triangular_params,
+    get_general_triangular_probabilities,
+)
 from .poisson_distribution import get_poisson_probabilities
 from .triangular_distribution import get_triangular_probabilities
 
@@ -11,5 +14,6 @@
     "get_binomial_probabilities",
     "get_poisson_probabilities",
     "get_triangular_probabilities",
+    "get_general_triangular_params",
     "get_general_triangular_probabilities",
 ]
diff --git a/dlordinal/distributions/binomial_distribution.py b/dlordinal/distributions/binomial_distribution.py
@@ -3,7 +3,8 @@
 
 
 def get_binomial_probabilities(J):
-    """Get probabilities for the binomial distribution for ``J`` classes or splits.
+    """Get probabilities for the binomial distribution for ``J`` classes or splits
+    using the approach described in :footcite:t:`liu2020unimodal`.
     The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
     each interval is computed as the difference between the value of the binomial
     probability function for the interval boundaries. The probability for the first

diff --git a/dlordinal/distributions/exponential_distribution.py b/dlordinal/distributions/exponential_distribution.py
@@ -3,7 +3,8 @@
 
 
 def get_exponential_probabilities(J, p=1.0, tau=1.0):
-    """Get probabilities from exponential distribution for ``J`` classes or splits.
+    """Get probabilities from exponential distribution for ``J`` classes or splits as
+    described in :footcite:t:`liu2020unimodal` and :footcite:t:`vargas2023exponential`.
     The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
     each interval is computed as the difference between the value of the exponential
     function for the interval boundaries. The probability for the first interval is
@@ -38,6 +39,7 @@ def get_exponential_probabilities(J, p=1.0, tau=1.0):
 
     Example
     -------
+    >>> from dlordinal.distributions import get_exponential_probabilities
     >>> get_exponential_probabilities(5)
     array([[0.63640865, 0.23412166, 0.08612854, 0.03168492, 0.01165623],
     [0.19151597, 0.52059439, 0.19151597, 0.07045479, 0.02591887],

diff --git a/dlordinal/distributions/general_triangular_distribution.py b/dlordinal/distributions/general_triangular_distribution.py
@@ -9,60 +9,96 @@
 
 def get_general_triangular_params(J: int, alphas: np.ndarray, verbose: int = 0):
     """
-    Get the parameters for the general triangular distribution.
+    Get the parameters (:math:`a`, :math:`b` and :math:`c`) for the
+    general triangular distribution. The parameters are computed using
+    the :math:`\\boldsymbol{\\alpha}` values and the number of classes
+    :math:`J`. The :math:`\\boldsymbol{\\alpha}` vector contains two :math:`\\alpha`
+    values for each class or split. :math:`\\alpha_0` should always be 0, given that
+    the error on the left side of the first class is always 0. In the same way, the
+    :math:`\\alpha_{2J}` value should always be 0, given that the error on the right
+    side of the last class is always 0. The :math:`\\alpha` values for the other
+    classes should be between 0 and 1.
+
+    The parameters :math:`a`, :math:`b` and :math:`c` for class :math:`j` are computed
+    as described in :footcite:t:`vargas2023gentri`.
 
     Parameters
     ----------
     J : int
-        Number of classes.
+        Number of classes or splits.
     alphas : np.ndarray
-        Array of alphas.
+        Array that represents the error on the left and right side of each class.
+        It is the :math:`\\boldsymbol{\\alpha}` vector described
+        in :footcite:t:`vargas2023gentri`.
     verbose : int, optional
         Verbosity level, by default 0.
+
+    Raises
+    ------
+    ValueError
+        If the number of classes :math:`J` is less than 2.
+        If the :math:`\\boldsymbol{\\alpha}` vector is not a numpy array of shape
+        :math:`(2J,)`.
+
+    Returns
+    -------
+    list
+        List of dictionaries with the parameters for each class.
+
+    Example
+    -------
+    >>> from dlordinal.distributions import get_general_triangular_params
+    >>> get_general_triangular_params(5, [0, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0])
+    [{'alpha2j_1': 0, 'alpha2j': 0.05, 'a': 0, 'b': 0.25760143110525874, 'c': 0}, {'alpha2j_1': 0.05, 'alpha2j': 0.05, 'a': 0.153752470442574, 'b': 0.446247529557426, 'c': 0.3}, {'alpha2j_1': 0.05, 'alpha2j': 0.05, 'a': 0.353752470442574, 'b': 0.646247529557426, 'c': 0.5}, {'alpha2j_1': 0.05, 'alpha2j': 0.1, 'a': 0.550779686438060, 'b': 0.875486049708105, 'c': 0.7}, {'alpha2j_1': 0.0, 'alpha2j': 0, 'a': 0.8, 'b': 1, 'c': 1}]
+
     """
+
     alphas = np.array(alphas)
     if not isinstance(alphas, np.ndarray) or alphas.shape != (2 * J,):
         raise ValueError(
             f"alphas must be a numpy array of shape (2 * n,), but got {alphas.shape}"
         )
 
-    def abc1(n, alpha):
+    if J < 2:
+        raise ValueError(f"J must be greater than 1, but got {J}")
+
+    def abc1(J, alpha):
         a = 0
-        b = 1.0 / ((1.0 - math.sqrt(alpha)) * n)
+        b = 1.0 / ((1.0 - math.sqrt(alpha)) * J)
         c = 0
         return a, b, c
 
-    def abcJ(n, alpha):
-        a = 1.0 - 1.0 / (n * (1.0 - math.sqrt(alpha)))
+    def abcJ(J, alpha):
+        a = 1.0 - 1.0 / (J * (1.0 - math.sqrt(alpha)))
         b = 1
         c = 1
         return a, b, c
 
-    def abcj(n, j, alpha2j_1, alpha2j):
+    def abcj(J, j, alpha2j_1, alpha2j):
         # Calculation of the terms of the system of equations
         # b = ...
-        numa2 = 2 * n**2 - 2 * n**2 * alpha2j_1
-        numa1 = 2 * j * n * alpha2j_1 - n * alpha2j_1 - 4 * n * j + 4 * n
+        numa2 = 2 * J**2 - 2 * J**2 * alpha2j_1
+        numa1 = 2 * j * J * alpha2j_1 - J * alpha2j_1 - 4 * J * j + 4 * J
         numa0 = 2 * j**2 - 4 * j + 2
-        dena0 = 2 * j * n * alpha2j_1 - n * alpha2j_1
-        dena1 = -(2 * n**2 * alpha2j_1)
+        dena0 = 2 * j * J * alpha2j_1 - J * alpha2j_1
+        dena1 = -(2 * J**2 * alpha2j_1)
 
         # a = ...
-        numb2 = 2 * n**2 * alpha2j - 2 * n**2
-        numb1 = 4 * n * j - 2 * n * j * alpha2j + n * alpha2j
+        numb2 = 2 * J**2 * alpha2j - 2 * J**2
+        numb1 = 4 * J * j - 2 * J * j * alpha2j + J * alpha2j
         numb0 = -(2 * j**2)
-        denb1 = 2 * n**2 * alpha2j
-        denb0 = -2 * n * j * alpha2j + n * alpha2j
+        denb1 = 2 * J**2 * alpha2j
+        denb0 = -2 * J * j * alpha2j + J * alpha2j
 
         a, b = symbols("a, b")
-        c = (2 * j - 1) / (2 * n)
+        c = (2 * j - 1) / (2 * J)
         eq1 = Eq((numa2 * a**2 + numa1 * a + numa0) / (dena0 + dena1 * a), b)
         eq2 = Eq((numb2 * b**2 + numb1 * b + numb0) / (denb1 * b + denb0), a)
 
         try:
             nsol = nsolve([eq1, eq2], [a, b], [1, 1])
 
-            if nsol[0] < (j - 1) / n and nsol[1] > j / n:
+            if nsol[0] < (j - 1) / J and nsol[1] > j / J:
                 return nsol[0], nsol[1], c
         except ValueError:
             pass
@@ -86,7 +122,7 @@ def abcj(n, j, alpha2j_1, alpha2j):
             if isinstance(s_b, Add):
                 s_b = s_b.args[0]
 
-            if s_a < (j - 1) / n and s_b > j / n:
+            if s_a < (j - 1) / J and s_b > j / J:
                 return s_a, s_b, c
 
         raise ValueError(f"Could not find solution for {j=}, {alpha2j_1=}, {alpha2j=}")
@@ -122,25 +158,70 @@ def abcj(n, j, alpha2j_1, alpha2j):
     return params
 
 
-def get_general_triangular_probabilities(n: int, alphas: np.ndarray, verbose: int = 0):
-    """
-    Get the probabilities for the general triangular distribution.
+def get_general_triangular_probabilities(J: int, alphas: np.ndarray, verbose: int = 0):
+    """Get probabilities from triangular distributions for ``J`` classes or splits.
+    The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
+    each interval is computed as the difference between the value of the triangular
+    distribution function for the interval boundaries. The probability for the first
+    interval is computed as the value of the triangular distribution function for the
+    first interval boundary.
+
+    The triangular distribution function is denoted as :math:`\\text{f}(x, a, b, c)`.
+    The parameters :math:`a`, :math:`b` and :math:`c` for class :math:`j` are computed
+    as described in :footcite:t:`vargas2023gentri`.
 
     Parameters
     ----------
-    n : int
+    J : int
         Number of classes.
     alphas : np.ndarray
         Array of alphas.
     verbose : int, optional
         Verbosity level, by default 0.
+
+    Raises
+    ------
+    ValueError
+        If ``J`` is not a positive integer greater than 1.
+        If ``alphas`` is not a numpy array of shape :math:`(2J,)`.
+
+    Returns
+    -------
+    probs : 2d array-like of shape (J, J)
+        Matrix of probabilities where each row represents the true class
+        and each column the probability for class j.
+
+    Example
+    -------
+    >>> from dlordinal.distributions import get_general_triangular_probabilities
+    >>> get_general_triangular_probabilities(
+    ...     5,
+    ...     [0, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0]
+    ... )
+    array([[0.95, 0.05, 0.  , 0.  , 0.  ],
+           [0.05, 0.9 , 0.05, 0.  , 0.  ],
+           [0.  , 0.05, 0.9 , 0.05, 0.  ],
+           [0.  , 0.  , 0.05, 0.85, 0.1 ],
+           [0.  , 0.  , 0.  , 0.  , 1.  ]], dtype=float32)
     """
 
-    intervals = get_intervals(n)
+    if J < 2:
+        raise ValueError(f"J must be greater than 1, but got {J}")
+
+    if isinstance(alphas, list):
+        alphas = np.array(alphas)
+
+    if not isinstance(alphas, np.ndarray) or alphas.shape != (2 * J,):
+        raise ValueError(
+            f"alphas must be a numpy array or list of shape (2 * n,),"
+            f" but got {{alphas.shape}}"
+        )
+
+    intervals = get_intervals(J)
     probs = []
 
     # Compute probability for each interval (class) using the distribution function.
-    params = get_general_triangular_params(n, alphas, verbose)
+    params = get_general_triangular_params(J, alphas, verbose)
 
     for param in params:
         a = param["a"]

diff --git a/dlordinal/distributions/poisson_distribution.py b/dlordinal/distributions/poisson_distribution.py
@@ -4,7 +4,8 @@
 
 
 def get_poisson_probabilities(J):
-    """Get probabilities from poisson distribution for ``J`` classes or splits.
+    """Get probabilities from poisson distribution for ``J`` classes or splits using the
+    methodology described in :footcite:t:`liu2020unimodal`.
     The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
     each interval is computed as the difference between the value of the poisson
     probability function for the interval boundaries. The probability for the first
@@ -32,6 +33,17 @@ def get_poisson_probabilities(J):
     probs : 2d array-like of shape (J, J)
         Matrix of probabilities where each row represents the true class
         and each column the probability for class j.
+
+    Example
+    -------
+    >>> from dlordinal.distributions import get_poisson_probabilities
+    >>> get_poisson_probabilities(5)
+    array([[0.23414552, 0.23414552, 0.19480578, 0.17232403, 0.16457916],
+        [0.18896888, 0.21635436, 0.21635436, 0.19768881, 0.18063359],
+        [0.17822335, 0.19688341, 0.21214973, 0.21214973, 0.20059378],
+        [0.17919028, 0.18931175, 0.20370191, 0.21389803, 0.21389803],
+        [0.18400408, 0.18903075, 0.19882883, 0.21031236, 0.21782399]])
+
     """
 
     if J < 2 or not isinstance(J, int):