Skip to content

Commit

Permalink
distributions documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
victormvy committed Jan 26, 2024
1 parent fe2a8a3 commit 21c3b5d
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 55 deletions.
6 changes: 5 additions & 1 deletion dlordinal/distributions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from .beta_distribution import get_beta_probabilities
from .binomial_distribution import get_binomial_probabilities
from .exponential_distribution import get_exponential_probabilities
from .general_triangular_distribution import get_general_triangular_probabilities
from .general_triangular_distribution import (
get_general_triangular_params,
get_general_triangular_probabilities,
)
from .poisson_distribution import get_poisson_probabilities
from .triangular_distribution import get_triangular_probabilities

Expand All @@ -11,5 +14,6 @@
"get_binomial_probabilities",
"get_poisson_probabilities",
"get_triangular_probabilities",
"get_general_triangular_params",
"get_general_triangular_probabilities",
]
3 changes: 2 additions & 1 deletion dlordinal/distributions/binomial_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@


def get_binomial_probabilities(J):
"""Get probabilities for the binomial distribution for ``J`` classes or splits.
"""Get probabilities for the binomial distribution for ``J`` classes or splits
using the approach described in :footcite:t:`liu2020unimodal`.
The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
each interval is computed as the difference between the value of the binomial
probability function for the interval boundaries. The probability for the first
Expand Down
4 changes: 3 additions & 1 deletion dlordinal/distributions/exponential_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@


def get_exponential_probabilities(J, p=1.0, tau=1.0):
"""Get probabilities from exponential distribution for ``J`` classes or splits.
"""Get probabilities from exponential distribution for ``J`` classes or splits as
described in :footcite:t:`liu2020unimodal` and :footcite:t:`vargas2023exponential`.
The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
each interval is computed as the difference between the value of the exponential
function for the interval boundaries. The probability for the first interval is
Expand Down Expand Up @@ -38,6 +39,7 @@ def get_exponential_probabilities(J, p=1.0, tau=1.0):
Example
-------
>>> from dlordinal.distributions import get_exponential_probabilities
>>> get_exponential_probabilities(5)
array([[0.63640865, 0.23412166, 0.08612854, 0.03168492, 0.01165623],
[0.19151597, 0.52059439, 0.19151597, 0.07045479, 0.02591887],
Expand Down
131 changes: 106 additions & 25 deletions dlordinal/distributions/general_triangular_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,60 +9,96 @@

def get_general_triangular_params(J: int, alphas: np.ndarray, verbose: int = 0):
"""
Get the parameters for the general triangular distribution.
Get the parameters (:math:`a`, :math:`b` and :math:`c`) for the
general triangular distribution. The parameters are computed using
the :math:`\\boldsymbol{\\alpha}` values and the number of classes
:math:`J`. The :math:`\\boldsymbol{\\alpha}` vector contains two :math:`\\alpha`
values for each class or split. :math:`\\alpha_0` should always be 0, given that
the error on the left side of the first class is always 0. In the same way, the
:math:`\\alpha_{2J}` value should always be 0, given that the error on the right
side of the last class is always 0. The :math:`\\alpha` values for the other
classes should be between 0 and 1.
The parameters :math:`a`, :math:`b` and :math:`c` for class :math:`j` are computed
as described in :footcite:t:`vargas2023gentri`.
Parameters
----------
J : int
Number of classes.
Number of classes or splits.
alphas : np.ndarray
Array of alphas.
Array that represents the error on the left and right side of each class.
It is the :math:`\\boldsymbol{\\alpha}` vector described
in :footcite:t:`vargas2023gentri`.
verbose : int, optional
Verbosity level, by default 0.
Raises
------
ValueError
If the number of classes :math:`J` is less than 2.
If the :math:`\\boldsymbol{\\alpha}` vector is not a numpy array of shape
:math:`(2J,)`.
Returns
-------
list
List of dictionaries with the parameters for each class.
Example
-------
>>> from dlordinal.distributions import get_general_triangular_params
>>> get_general_triangular_params(5, [0, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0])
[{'alpha2j_1': 0, 'alpha2j': 0.05, 'a': 0, 'b': 0.25760143110525874, 'c': 0}, {'alpha2j_1': 0.05, 'alpha2j': 0.05, 'a': 0.153752470442574, 'b': 0.446247529557426, 'c': 0.3}, {'alpha2j_1': 0.05, 'alpha2j': 0.05, 'a': 0.353752470442574, 'b': 0.646247529557426, 'c': 0.5}, {'alpha2j_1': 0.05, 'alpha2j': 0.1, 'a': 0.550779686438060, 'b': 0.875486049708105, 'c': 0.7}, {'alpha2j_1': 0.0, 'alpha2j': 0, 'a': 0.8, 'b': 1, 'c': 1}]
"""

alphas = np.array(alphas)
if not isinstance(alphas, np.ndarray) or alphas.shape != (2 * J,):
raise ValueError(
f"alphas must be a numpy array of shape (2 * n,), but got {alphas.shape}"
)

def abc1(n, alpha):
if J < 2:
raise ValueError(f"J must be greater than 1, but got {J}")

def abc1(J, alpha):
a = 0
b = 1.0 / ((1.0 - math.sqrt(alpha)) * n)
b = 1.0 / ((1.0 - math.sqrt(alpha)) * J)
c = 0
return a, b, c

def abcJ(n, alpha):
a = 1.0 - 1.0 / (n * (1.0 - math.sqrt(alpha)))
def abcJ(J, alpha):
a = 1.0 - 1.0 / (J * (1.0 - math.sqrt(alpha)))
b = 1
c = 1
return a, b, c

def abcj(n, j, alpha2j_1, alpha2j):
def abcj(J, j, alpha2j_1, alpha2j):
# Calculation of the terms of the system of equations
# b = ...
numa2 = 2 * n**2 - 2 * n**2 * alpha2j_1
numa1 = 2 * j * n * alpha2j_1 - n * alpha2j_1 - 4 * n * j + 4 * n
numa2 = 2 * J**2 - 2 * J**2 * alpha2j_1
numa1 = 2 * j * J * alpha2j_1 - J * alpha2j_1 - 4 * J * j + 4 * J
numa0 = 2 * j**2 - 4 * j + 2
dena0 = 2 * j * n * alpha2j_1 - n * alpha2j_1
dena1 = -(2 * n**2 * alpha2j_1)
dena0 = 2 * j * J * alpha2j_1 - J * alpha2j_1
dena1 = -(2 * J**2 * alpha2j_1)

# a = ...
numb2 = 2 * n**2 * alpha2j - 2 * n**2
numb1 = 4 * n * j - 2 * n * j * alpha2j + n * alpha2j
numb2 = 2 * J**2 * alpha2j - 2 * J**2
numb1 = 4 * J * j - 2 * J * j * alpha2j + J * alpha2j
numb0 = -(2 * j**2)
denb1 = 2 * n**2 * alpha2j
denb0 = -2 * n * j * alpha2j + n * alpha2j
denb1 = 2 * J**2 * alpha2j
denb0 = -2 * J * j * alpha2j + J * alpha2j

a, b = symbols("a, b")
c = (2 * j - 1) / (2 * n)
c = (2 * j - 1) / (2 * J)
eq1 = Eq((numa2 * a**2 + numa1 * a + numa0) / (dena0 + dena1 * a), b)
eq2 = Eq((numb2 * b**2 + numb1 * b + numb0) / (denb1 * b + denb0), a)

try:
nsol = nsolve([eq1, eq2], [a, b], [1, 1])

if nsol[0] < (j - 1) / n and nsol[1] > j / n:
if nsol[0] < (j - 1) / J and nsol[1] > j / J:
return nsol[0], nsol[1], c
except ValueError:
pass
Expand All @@ -86,7 +122,7 @@ def abcj(n, j, alpha2j_1, alpha2j):
if isinstance(s_b, Add):
s_b = s_b.args[0]

if s_a < (j - 1) / n and s_b > j / n:
if s_a < (j - 1) / J and s_b > j / J:
return s_a, s_b, c

raise ValueError(f"Could not find solution for {j=}, {alpha2j_1=}, {alpha2j=}")
Expand Down Expand Up @@ -122,25 +158,70 @@ def abcj(n, j, alpha2j_1, alpha2j):
return params


def get_general_triangular_probabilities(n: int, alphas: np.ndarray, verbose: int = 0):
"""
Get the probabilities for the general triangular distribution.
def get_general_triangular_probabilities(J: int, alphas: np.ndarray, verbose: int = 0):
"""Get probabilities from triangular distributions for ``J`` classes or splits.
The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
each interval is computed as the difference between the value of the triangular
distribution function for the interval boundaries. The probability for the first
interval is computed as the value of the triangular distribution function for the
first interval boundary.
The triangular distribution function is denoted as :math:`\\text{f}(x, a, b, c)`.
The parameters :math:`a`, :math:`b` and :math:`c` for class :math:`j` are computed
as described in :footcite:t:`vargas2023gentri`.
Parameters
----------
n : int
J : int
Number of classes.
alphas : np.ndarray
Array of alphas.
verbose : int, optional
Verbosity level, by default 0.
Raises
------
ValueError
If ``J`` is not a positive integer greater than 1.
If ``alphas`` is not a numpy array of shape :math:`(2J,)`.
Returns
-------
probs : 2d array-like of shape (J, J)
Matrix of probabilities where each row represents the true class
and each column the probability for class j.
Example
-------
>>> from dlordinal.distributions import get_general_triangular_probabilities
>>> get_general_triangular_probabilities(
... 5,
... [0, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0]
... )
array([[0.95, 0.05, 0. , 0. , 0. ],
[0.05, 0.9 , 0.05, 0. , 0. ],
[0. , 0.05, 0.9 , 0.05, 0. ],
[0. , 0. , 0.05, 0.85, 0.1 ],
[0. , 0. , 0. , 0. , 1. ]], dtype=float32)
"""

intervals = get_intervals(n)
if J < 2:
raise ValueError(f"J must be greater than 1, but got {J}")

if isinstance(alphas, list):
alphas = np.array(alphas)

if not isinstance(alphas, np.ndarray) or alphas.shape != (2 * J,):
raise ValueError(
f"alphas must be a numpy array or list of shape (2 * n,),"
f" but got {{alphas.shape}}"
)

intervals = get_intervals(J)
probs = []

# Compute probability for each interval (class) using the distribution function.
params = get_general_triangular_params(n, alphas, verbose)
params = get_general_triangular_params(J, alphas, verbose)

for param in params:
a = param["a"]
Expand Down
14 changes: 13 additions & 1 deletion dlordinal/distributions/poisson_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@


def get_poisson_probabilities(J):
"""Get probabilities from poisson distribution for ``J`` classes or splits.
"""Get probabilities from poisson distribution for ``J`` classes or splits using the
methodology described in :footcite:t:`liu2020unimodal`.
The :math:`[0,1]` interval is split into ``J`` intervals and the probability for
each interval is computed as the difference between the value of the poisson
probability function for the interval boundaries. The probability for the first
Expand Down Expand Up @@ -32,6 +33,17 @@ def get_poisson_probabilities(J):
probs : 2d array-like of shape (J, J)
Matrix of probabilities where each row represents the true class
and each column the probability for class j.
Example
-------
>>> from dlordinal.distributions import get_poisson_probabilities
>>> get_poisson_probabilities(5)
array([[0.23414552, 0.23414552, 0.19480578, 0.17232403, 0.16457916],
[0.18896888, 0.21635436, 0.21635436, 0.19768881, 0.18063359],
[0.17822335, 0.19688341, 0.21214973, 0.21214973, 0.20059378],
[0.17919028, 0.18931175, 0.20370191, 0.21389803, 0.21389803],
[0.18400408, 0.18903075, 0.19882883, 0.21031236, 0.21782399]])
"""

if J < 2 or not isinstance(J, int):
Expand Down
Loading

0 comments on commit 21c3b5d

Please sign in to comment.