Skip to content

Commit

Permalink
Merge branch 'main' into es-tutorials
Browse files Browse the repository at this point in the history
  • Loading branch information
LimbeckKat authored Jan 10, 2025
2 parents 21143c7 + af754ba commit 9b0e8a1
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 60 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# magnipy: Metric Space Magnitude Computations 🔎
# magnipy: Metric Space Magnitude Computations 🔍

This is a repository for computing the **_magnitude of a metric space_**, which encodes the **effective size, diversity, and geometry** of a metric space. Given a dataset or distance matrix, **_magnitude_** measures the **effective number of distinct points** in the space at a scale of dissimilarity between observations.
We introduce the following codebase to compute and compare the magnitude of metric spaces.
Expand Down
85 changes: 73 additions & 12 deletions magnipy/diversipy.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ def __init__(
An object that can be used to compute and compare the magnitude functions of multiple spaces.
"""

if not isinstance(Xs, list):
raise Exception(
"Xs needs to be a list of one or multiple datasets."
)

self._Xs = Xs

if method is None:
Expand All @@ -97,8 +102,21 @@ def __init__(
self._p = p
self._n_neighbors = n_neighbors
self._target_prop = target_prop
self._Mags = None

if ref_space is not None:
if not isinstance(ref_space, int):
raise Exception(
"ref_space needs to be an integer index corresponding to the index of the reference dataset in the list of input datasets."
)
else:
if ref_space >= len(Xs):
raise Exception(
"ref_space needs to be an integer index corresponding to the index of the reference dataset in the list of input datasets."
)

self._ref_space = ref_space

self._Mags = None
self._t_convs = None
self._MagAreas = None
self._MagDiffs = None
Expand All @@ -120,15 +138,58 @@ def set_ref_space(self, ref_space):
self._ref_space = ref_space
return None

def get_t_convs(self):
"""
Get the approximate convergence scales for all datasets.
"""
t_convs = []
for i, X in enumerate(self._Xs):
if self._Mags is not None:
Mag = self._Mags[i]
else:
Mag = Magnipy(
X,
ts=self._ts,
scale_finding="convergence",
target_prop=self._target_prop,
n_ts=2,
log_scale=False,
method=self._method,
metric=self._metric,
p=self._p,
one_point_property=True,
return_log_scale=False,
perturb_singularities=True,
recompute=False,
name=self._names[i],
positive_magnitude=False,
)
t_convs.append(Mag.get_t_conv())
# Mags.append(Mag)
# self._Mags = Mags
self._t_convs = t_convs
return t_convs

def get_common_scales(self, quantile=0.5):
"""
Determine the shared evaluation interval for the magnitude functions.
To do this, the convergence scales of the magnitude functions are computed and
the shared scales are determined as a quantile (e.g. the median) of the convergence scales for all datasets.
To do this, the approximate convergence scale of the reference dataset is computed
and used as the common cutoff scale to define the evaluation interval.
Otherwise, if no reference space is set the convergence scales of all magnitude
functions are computed and the shared evaluation scales are determined as a
quantile (e.g. the median) of the convergence scales for all datasets.
Parameters
----------
quantile : float
The quantile to use for determining the common scales.
By default 0.5 (median convergence scale).
"""
if self._t_convs is None:
t_convs = self.get_t_convs()

if self._ref_space is not None:
t_cut = self._Mags[self._ref_space].get_t_conv()
t_cut = self._t_convs[self._ref_space]
else:
if self._q is None:
quantile = 0.5
Expand All @@ -141,13 +202,13 @@ def get_common_scales(self, quantile=0.5):
log_scale=False,
one_point_property=True,
)
# self._ts = ts
self._t_cut = t_cut
return ts

def change_scales(self, ts=None, t_cut=None):
"""
Change the evaluation scales of the magnitude functions.
If no scales are given, the evaluation interval is reset to None.
Parameters
----------
Expand Down Expand Up @@ -178,22 +239,23 @@ def change_scales(self, ts=None, t_cut=None):
# │ Compute Magnitude Functions │
# ╰──────────────────────────────────────────────────────────╯

def _compute_magnitude(self):
def _compute_magnitude(self, quantile=0.5):
"""
Compute the magnitude functions for all datasets.
"""

t_convs = []
Mags = []
if self._ts is None:
t_convs = self.get_t_convs()
ts = self.get_common_scales(quantile=quantile)
self._ts = ts

Mags = []
for i, X in enumerate(self._Xs):
Mag = Magnipy(
X,
ts=self._ts,
ts=ts,
scale_finding="convergence",
target_prop=self._target_prop,
n_ts=2,
log_scale=False,
method=self._method,
metric=self._metric,
p=self._p,
Expand All @@ -204,7 +266,6 @@ def _compute_magnitude(self):
name=self._names[i],
positive_magnitude=False,
)
t_convs.append(Mag.get_t_conv())
Mags.append(Mag)
self._Mags = Mags
self._t_convs = t_convs
Expand Down
95 changes: 75 additions & 20 deletions magnipy/magnipy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
compute_t_conv,
)
from magnipy.magnitude.dimension import (
magitude_dimension_profile,
magitude_dimension_profile_interp,
magnitude_dimension,
magnitude_dimension_profile_exact,
)
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(
ts : array_like, shape (`n_ts`, )
The scales at which to evaluate the magnitude functions. If None, the scales are computed automatically.
n_ts : int
The number of scales at which to evaluate the magnitude functions.
The number of scales at which to evaluate the magnitude functions. Computations are faster for fewer scales and more accurate for more scales.
log_scale : bool
Whether to use a log-scale for the evaluation scales.
return_log_scale : bool
Expand Down Expand Up @@ -102,7 +102,7 @@ def __init__(
Parameters for the computation of magnitude:
method : str
The method to use to compute the magnitude functions.
One of 'cholesky', 'scipy', 'scipy_sym', 'inv', 'pinv', 'conjugate_gradient_iteration', 'cg'.
One of 'cholesky', 'scipy', 'scipy_sym', 'naive', 'pinv', 'conjugate_gradient_iteration', 'cg'.
one_point_property : bool
Whether to enforce the one-point property.
perturb_singularities : bool
Expand All @@ -120,7 +120,50 @@ def __init__(
A Magnipy object.
"""

### Check if the input matrix X is valid
if not isinstance(X, np.ndarray):
raise Exception("The input matrix must be a numpy array.")

### Check if the inputs used for scale-finding are valid
if isinstance(target_prop, float):
min_mag = 1 / X.shape[0]
if (target_prop < min_mag) | (target_prop > 1):
raise Exception(
f"The target proportion must be between {min_mag} and 1."
)
else:
raise Exception("The target proportion must be a float.")

self._proportion_scattered = target_prop
if (scale_finding != "scattered") & (scale_finding != "convergence"):
raise Exception(
"The scale finding method must be either 'scattered' or 'convergence'."
)
self._scale_finding = scale_finding

### Check if the evaluation scales are valid
self._ts = ts
if not isinstance(n_ts, int):
raise Exception("n_ts must be an integer.")
self._n_ts = n_ts

### Check if the adjacency matrix is valid
if Adj is not None:
if not isinstance(Adj, np.ndarray):
raise Exception("The adjacency matrix must be a numpy array.")
if Adj.shape[0] != X.shape[0]:
raise Exception(
"The adjacency matrix must have the same number of rows as the dataset."
)
if Adj.shape[1] != X.shape[0]:
raise Exception(
"The adjacency matrix must have the same number of columns as the dataset."
)

### Setting up the distance computations and the similarity matrix
self._Adj = Adj
self._metric = metric

if metric != "precomputed":
self._X = X

Expand All @@ -141,31 +184,31 @@ def compute_distances(X, X2, Adj=None):
self._target_value = target_prop * self._D.shape[0]
self._Z = similarity_matrix(self._D)
else:

if X.shape[0] != X.shape[1]:
raise Exception(
"The precomputed distance matrix must be square."
)

self._X = None
self._D = X
self._n = self._D.shape[0]
self._Z = similarity_matrix(self._D)
self._target_value = target_prop * self._D.shape[0]

self._proportion_scattered = target_prop
if (scale_finding != "scattered") & (scale_finding != "convergence"):
raise Exception(
"The scale finding method must be either 'scattered' or 'convergence'."
)

### Check if the method for computing the magnitude is valid and set up the magnitude computations
if method not in [
"cholesky",
"scipy",
"scipy_sym",
"inv",
"naive",
"pinv",
"conjugate_gradient_iteration",
"cg",
"spread",
"naive",
]:
raise Exception(
"The computation method must be one of 'cholesky', 'scipy', 'scipy_sym', 'inv', 'pinv', 'conjugate_gradient_iteration', 'cg', 'naive', 'spread'."
"The computation method must be one of 'cholesky', 'scipy', 'scipy_sym', 'naive', 'pinv', 'conjugate_gradient_iteration', 'cg', 'spread'."
)

def compute_mag(Z, ts, n_ts=n_ts, get_weights=False):
Expand All @@ -183,22 +226,34 @@ def compute_mag(Z, ts, n_ts=n_ts, get_weights=False):
)

self._compute_mag = compute_mag

self._scale_finding = scale_finding
self._ts = ts
self._n_ts = n_ts
self._log_scale = log_scale
self._method = method
self._metric = metric
# self._p = p
# self._n_neighbors = n_neighbors

### Check if the boolean parameters are valid
for k, arg in enumerate(
[log_scale, return_log_scale, recompute, positive_magnitude]
):
arg_name = [
"log_scale",
"return_log_scale",
"recompute",
"positive_magnitude",
][k]
if not isinstance(arg, bool):
raise Exception(f"{arg_name} must be a boolean.")

self._log_scale = log_scale
self._one_point_property = one_point_property
self._perturb_singularities = perturb_singularities
# self._n_neighbors = n_neighbors
self._return_log_scale = return_log_scale
self._recompute = recompute
self._positive_magnitude = positive_magnitude

### Set the name of the Magnipy object
self._name = name

### Set the other parameters
self._magnitude = None
self._weights = None
self._magnitude_dimension_profile = None
Expand Down Expand Up @@ -484,7 +539,7 @@ def get_magnitude_dimension_profile(self, exact=False, h=None):
(
self._magnitude_dimension_profile,
self._ts_dim,
) = magitude_dimension_profile(
) = magitude_dimension_profile_interp(
mag=self._magnitude,
ts=self._ts,
return_log_scale=self._return_log_scale,
Expand Down
8 changes: 4 additions & 4 deletions magnipy/magnitude/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from magnipy.magnitude.convergence import guess_convergence_scale


def magnitude_from_distances(
def compute_magnitude_from_distances(
D,
ts=np.arange(0.01, 5, 0.01),
method="cholesky",
Expand Down Expand Up @@ -76,7 +76,7 @@ def magnitude_from_distances(
mag_fn = weights_naive
else:
raise Exception(
"The computation method must be one of 'cholesky', 'scipy', 'scipy_sym', 'inv', 'pinv', 'conjugate_gradient_iteration', 'cg', 'naive', 'spread'."
"The computation method must be one of 'cholesky', 'scipy', 'scipy_sym', 'pinv', 'conjugate_gradient_iteration', 'cg', 'naive', 'spread'."
)

weights = magnitude_weights(
Expand Down Expand Up @@ -171,7 +171,7 @@ def compute_magnitude_until_convergence(
)
# print(f"Evaluate magnitude at {self._n_ts} scales between 0 and the approximate convergence scale {self._t_conv}")
return (
magnitude_from_distances(
compute_magnitude_from_distances(
Z,
ts,
method=method,
Expand Down Expand Up @@ -319,7 +319,7 @@ def compute_t_conv(
)

def comp_mag(X, ts):
return magnitude_from_distances(
return compute_magnitude_from_distances(
X,
ts,
method=method,
Expand Down
Loading

0 comments on commit 9b0e8a1

Please sign in to comment.