-
Notifications
You must be signed in to change notification settings - Fork 66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PersistenceLengths and its unitary tests #1117
Changes from 5 commits
01ea034
d30299f
768774a
3bbd3af
af41754
009748c
e984496
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,7 +73,7 @@ def transform(self, X): | |
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (number of pixels = **resolution[0]** x **resolution[1]**): output persistence images. | ||
""" | ||
|
@@ -196,7 +196,7 @@ def transform(self, X): | |
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes. | ||
""" | ||
|
@@ -271,7 +271,7 @@ def transform(self, X): | |
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes. | ||
""" | ||
|
@@ -363,7 +363,7 @@ def fit(self, X, y = None): | |
|
||
if self.predefined_grid is None: | ||
if self.resolution is None: # Flexible/exact version | ||
self.grid_ = np.unique(np.concatenate([pd.ravel() for pd in X] + [[-np.inf]], axis=0)) | ||
self.grid_ = np.unique(np.concatenate([pd.ravel() for pd in X] + [[-np.inf]], axis=0)) | ||
else: | ||
_grid_from_sample_range(self, X) | ||
else: | ||
|
@@ -391,7 +391,7 @@ def transform(self, X): | |
|
||
print("Empty list: output has shape [0, len(grid)]") | ||
return np.zeros((N, len(self.grid_))) | ||
|
||
else: | ||
|
||
events = np.concatenate([pd.ravel(order="F") for pd in X], axis=0) | ||
|
@@ -413,7 +413,7 @@ def transform(self, X): | |
i += 1 | ||
for k in range(0, N): | ||
bettis[k].append(bettis[k][-1]) | ||
|
||
return np.array(bettis, dtype=int)[:, 0:-1] | ||
|
||
def fit_transform(self, X, y = None): | ||
|
@@ -485,7 +485,7 @@ def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range= | |
|
||
Parameters: | ||
mode (string): what entropy to compute: either "scalar" for computing the entropy statistics, or "vector" for computing the entropy summary functions (default "scalar"). | ||
normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector". | ||
normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector". | ||
resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector". | ||
sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector". | ||
keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range. | ||
|
@@ -515,16 +515,16 @@ def transform(self, X): | |
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (1 if **mode** = "scalar" else **resolution**): output entropy. | ||
""" | ||
num_diag, Xfit = len(X), [] | ||
new_X = BirthPersistenceTransform().fit_transform(X) | ||
new_X = BirthPersistenceTransform().fit_transform(X) | ||
|
||
for i in range(num_diag): | ||
orig_diagram, new_diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0] | ||
|
||
p = new_diagram[:,1] | ||
p = p/np.sum(p) | ||
if self.mode == "scalar": | ||
|
@@ -566,7 +566,7 @@ def __init__(self, threshold=10): | |
Constructor for the TopologicalVector class. | ||
|
||
Parameters: | ||
threshold (int): number of distances to keep (default 10). This is the dimension of the topological vector. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding topological vector as threshold. | ||
threshold (int): number of distances to keep (default 10). This is the dimension of the topological vector. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding topological vector as threshold. | ||
""" | ||
self.threshold = threshold | ||
|
||
|
@@ -586,7 +586,7 @@ def transform(self, X): | |
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (**threshold**): output topological vectors. | ||
""" | ||
|
@@ -638,7 +638,7 @@ def __init__(self, polynomial_type="R", threshold=10): | |
|
||
Parameters: | ||
polynomial_type (char): either "R", "S" or "T" (default "R"). Type of complex polynomial that is going to be computed (explained in https://link.springer.com/chapter/10.1007%2F978-3-319-23231-7_27). | ||
threshold (int): number of coefficients (default 10). This is the dimension of the complex vector of coefficients, i.e. the number of coefficients corresponding to the largest degree terms of the polynomial. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding complex vector of coefficients as threshold. | ||
threshold (int): number of coefficients (default 10). This is the dimension of the complex vector of coefficients, i.e. the number of coefficients corresponding to the largest degree terms of the polynomial. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding complex vector of coefficients as threshold. | ||
""" | ||
self.threshold, self.polynomial_type = threshold, polynomial_type | ||
|
||
|
@@ -658,7 +658,7 @@ def transform(self, X): | |
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (**threshold**): output complex vectors of coefficients. | ||
""" | ||
|
@@ -681,9 +681,9 @@ def transform(self, X): | |
roots = np.multiply( (D[:,1]-D[:,0])/2, np.cos(alpha) - np.sin(alpha) + 1j * (np.cos(alpha) + np.sin(alpha)) ) | ||
coeff = [0] * (N+1) | ||
coeff[N] = 1 | ||
for i in range(1, N+1): | ||
for j in range(N-i-1, N): | ||
coeff[j] += ((-1) * roots[i-1] * coeff[j+1]) | ||
for i in range(1, N+1): | ||
for j in range(N-i-1, N): | ||
coeff[j] += ((-1) * roots[i-1] * coeff[j+1]) | ||
coeff = np.array(coeff[::-1])[1:] | ||
Xfit[d, :min(thresh, coeff.shape[0])] = coeff[:min(thresh, coeff.shape[0])] | ||
return Xfit | ||
|
@@ -885,3 +885,69 @@ def transform(self, X, sample_weight=None): | |
|
||
def get_feature_names_out(self): | ||
return self._running_transform_names | ||
|
||
class PersistenceLengths(BaseEstimator, TransformerMixin): | ||
""" | ||
This is a class that returns the sorted N-longest persistence lengths. If the input does not contain enough values, | ||
the output will be filled with zeros. | ||
""" | ||
|
||
def __init__(self, num_lengths=5): | ||
""" | ||
Constructor for the PersistenceLengths class. | ||
|
||
Parameters: | ||
num_lengths (int): number of persistence lengths to return (default 5). | ||
|
||
:raises ValueError: If num_lengths is lower or equal to 0. | ||
""" | ||
if num_lengths <= 0: | ||
raise ValueError("num_lengths must be greater than 0.") | ||
self.num_lengths = num_lengths | ||
|
||
def fit(self, X, y=None): | ||
""" | ||
Fit the PersistenceLengths class on a list of persistence diagrams (this function actually does nothing but is | ||
useful when PersistenceLengths is included in a scikit-learn Pipeline). | ||
|
||
Parameters: | ||
X (list of n x 2 or n x 1 numpy arrays): input persistence diagrams. | ||
y (n x 1 array): persistence diagram lengths (unused). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have to be that specific? Sklearn seems to say just
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good to me. I changed it on 009748c |
||
""" | ||
return self | ||
|
||
def transform(self, X): | ||
""" | ||
Compute the persistence lengths for each persistence diagram individually. | ||
|
||
Parameters: | ||
X (list of n x 2 numpy arrays): input persistence diagrams. | ||
|
||
Returns: | ||
numpy array with shape (number of diagrams) x (num_lengths): output persistence lengths. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be nice, but isn't the current output a list? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, so I took the opportunity to rewrite it with a first result array filled with zeros, that is instantiated rows by rows on 3bbd3af |
||
""" | ||
pers_length_array = np.zeros((len(X), self.num_lengths)) | ||
for idx, pd in enumerate(X): | ||
pl = pd[:, 1] - pd[:, 0] | ||
if len(pl) >= self.num_lengths: | ||
# Select the num_lengths biggest persistence bars length | ||
pl = np.partition(pl, -self.num_lengths)[-self.num_lengths :] | ||
|
||
# Sort in reverse order persistence lengths (where length = death - birth) | ||
pl = np.flip(np.sort(pl)) | ||
# Filled with zeros if not enough values | ||
pers_length_array[idx][:len(pl)] = pl | ||
|
||
return pers_length_array | ||
|
||
def __call__(self, diag): | ||
""" | ||
Apply PersistenceLengths on a single persistence diagram and outputs the result. | ||
|
||
Parameters: | ||
diag (n x 2 numpy array): input persistence diagram. | ||
|
||
Returns: | ||
numpy 1d array of length num_lengths: output persistence lengths. | ||
""" | ||
return self.transform([diag])[0] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why nx1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I missed this copy /paste that has to be changed. I fixed it on e984496