Skip to content

Commit

Permalink
[ENH] Use images in MovingAveragePCA instead of arrays (#32)
Browse files Browse the repository at this point in the history
* Use images.

* Update test_mapca.py
  • Loading branch information
tsalo authored Feb 24, 2021
1 parent 81bca85 commit c098329
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 56 deletions.
131 changes: 83 additions & 48 deletions mapca/mapca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
"""
import logging

import nibabel as nib
import numpy as np
from nilearn._utils import check_niimg_3d, check_niimg_4d
from scipy.stats import kurtosis
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
Expand Down Expand Up @@ -35,6 +37,10 @@ class MovingAveragePCA:
components_ : array, shape (n_components, n_features)
Principal axes in feature space, representing the directions of maximum
variance in the data. The components are sorted by explained_variance_.
u_ : array, shape (n_components, n_mask)
Component weight maps, limited to voxels in the mask.
u_nii_ : 4D nibabel.nifti1.Nifti1Image
Component weight maps, stored as a 4D niimg.
explained_variance_ : array, shape (n_components,)
The amount of variance explained by each of the selected components.
Expand Down Expand Up @@ -85,14 +91,26 @@ def __init__(self, criterion="mdl", normalize=True):
self.criterion = criterion
self.normalize = normalize

def _fit(self, X, shape_3d, mask_vec):
LGR.info("Performing dimensionality reduction based on GIFT "
"(https://trendscenter.org/software/gift/) and Li, Y. O., Adali, T., "
"& Calhoun, V. D. (2007). Estimating the number of independent components "
"for functional magnetic resonance imaging data. Human Brain Mapping, 28(11), "
"1251–1266. https://doi.org/10.1002/hbm.20359")
n_x, n_y, n_z = shape_3d
n_samples, n_timepoints = X.shape
def _fit(self, img, mask):
LGR.info(
"Performing dimensionality reduction based on GIFT "
"(https://trendscenter.org/software/gift/) and Li, Y. O., Adali, T., "
"& Calhoun, V. D. (2007). Estimating the number of independent components "
"for functional magnetic resonance imaging data. Human Brain Mapping, 28(11), "
"1251–1266. https://doi.org/10.1002/hbm.20359"
)

img = check_niimg_4d(img)
mask = check_niimg_3d(mask)
data = img.get_fdata()
mask = mask.get_fdata()

[n_x, n_y, n_z, n_timepoints] = data.shape
data_nib_V = np.reshape(data, (n_x * n_y * n_z, n_timepoints), order="F")
mask_vec = np.reshape(mask, n_x * n_y * n_z, order="F")
X = data_nib_V[mask_vec == 1, :]

n_samples = np.sum(mask_vec)

self.scaler_ = StandardScaler(with_mean=True, with_std=True)
if self.normalize:
Expand Down Expand Up @@ -243,57 +261,65 @@ def _fit(self, X, shape_3d, mask_vec):
self.n_features_ = ppca.n_features_
self.n_samples_ = ppca.n_samples_
self.noise_variance_ = ppca.noise_variance_
self.u = np.dot(np.dot(X, self.components_.T), np.diag(1.0 / self.explained_variance_))

def fit(self, X, shape_3d, mask_vec):
component_maps = np.dot(
np.dot(X, self.components_.T), np.diag(1.0 / self.explained_variance_)
)
component_maps_3d = np.zeros((n_x * n_y * n_z, n_components))
component_maps_3d[mask_vec == 1, :] = component_maps
component_maps_3d = np.reshape(component_maps_3d, (n_x, n_y, n_z, n_components), order="F")
self.u_ = component_maps
self.u_nii_ = nib.Nifti1Image(component_maps_3d, img.affine, img.header)

def fit(self, img, mask):
"""Fit the model with X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples and
n_features is the number of features.
img : 4D niimg_like
Data on which to apply PCA.
mask : 3D niimg_like
Mask to apply on ``img``.
Returns
-------
self : object
Returns the instance itself.
"""
self._fit(X, shape_3d, mask_vec)
self._fit(img, mask)
return self

def fit_transform(self, X, shape_3d, mask_vec):
def fit_transform(self, img, mask):
"""Fit the model with X and apply the dimensionality reduction on X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples and
n_features is the number of features.
img : 4D niimg_like
Data on which to apply PCA.
mask : 3D niimg_like
Mask to apply on ``img``.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
Transformed values.
X_new : 4D niimg_like
Component weight maps.
Notes
-----
The transformation step is different from scikit-learn's approach,
which ignores explained variance.
"""
self._fit(X, shape_3d, mask_vec)
return self.transform(X)
self._fit(img, mask)
return self.transform(img)

def transform(self, X):
def transform(self, img):
"""Apply dimensionality reduction to X.
X is projected on the first principal components previously extracted from a training set.
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples and n_features
is the number of features.
img : 4D niimg_like
Data on which to apply PCA.
Returns
-------
Expand All @@ -305,34 +331,52 @@ def transform(self, X):
"""
# X = self.scaler_.fit_transform(X.T).T
# X_new = np.dot(np.dot(X, self.components_.T), np.diag(1.0 / self.explained_variance_))
return self.u
return self.u_nii_

def inverse_transform(self, X):
def inverse_transform(self, img, mask):
"""Transform data back to its original space.
In other words, return an input X_original whose transform would be X.
Parameters
----------
X : array-like, shape (n_samples, n_components)
New data, where n_samples is the number of samples and n_components
is the number of components.
img : 4D niimg_like
Component weight maps.
mask : 3D niimg_like
Mask to apply on ``img``.
Returns
-------
X_original : array-like, shape (n_samples, n_features)
img_orig : 4D niimg_like
Reconstructed original data, with fourth axis corresponding to time.
Notes
-----
This is different from scikit-learn's approach, which ignores explained variance.
"""
img = check_niimg_4d(img)
mask = check_niimg_3d(mask)
data = img.get_fdata()
mask = mask.get_fdata()

[n_x, n_y, n_z, n_components] = data.shape
data_nib_V = np.reshape(data, (n_x * n_y * n_z, n_components), order="F")
mask_vec = np.reshape(mask, n_x * n_y * n_z, order="F")
X = data_nib_V[mask_vec == 1, :]

X_orig = np.dot(np.dot(X, np.diag(self.explained_variance_)), self.components_)
if self.normalize:
X_orig = self.scaler_.inverse_transform(X_orig.T).T
return X_orig

n_t = X_orig.shape[1]
out_data = np.zeros((n_x * n_y * n_z, n_t))
out_data[mask_vec == 1, :] = X_orig
out_data = np.reshape(out_data, (n_x, n_y, n_z, n_t), order="F")
img_orig = nib.Nifti1Image(out_data, img.affine, img.header)
return img_orig


def ma_pca(img, mask_img, criterion="mdl", normalize=False):
def ma_pca(img, mask, criterion="mdl", normalize=False):
"""Perform moving average-based PCA on imaging data.
Run Singular Value Decomposition (SVD) on input data,
Expand All @@ -344,7 +388,7 @@ def ma_pca(img, mask_img, criterion="mdl", normalize=False):
----------
img : 4D niimg_like
Data on which to apply PCA.
mask_img : 3D niimg_like
mask : 3D niimg_like
Mask to apply on ``img``.
criterion : {'mdl', 'aic', 'kic'}, optional
Criterion used to select the number of components. Default is "mdl".
Expand All @@ -358,26 +402,17 @@ def ma_pca(img, mask_img, criterion="mdl", normalize=False):
Returns
-------
u : array-like, shape (n_samples, n_components)
Component weight map for each component.
Component weight map for each component, after masking.
s : array-like, shape (n_components,)
Variance explained for each component.
varex_norm : array-like, shape (n_components,)
Explained variance ratio.
v : array-like, shape (n_timepoints, n_components)
Component timeseries.
"""
# from nilearn import masking

# data = masking.apply_mask(img, mask_img).T
# mask_vec = np.reshape(mask_img.get_fdata(), np.prod(mask_img.shape), order="F")
img = img.get_fdata()
mask_img = mask_img.get_fdata()
[Nx, Ny, Nz, Nt] = img.shape
data_nib_V = np.reshape(img, (Nx * Ny * Nz, Nt), order='F')
mask_vec = np.reshape(mask_img, Nx * Ny * Nz, order='F')
data = data_nib_V[mask_vec == 1, :]
pca = MovingAveragePCA(criterion=criterion, normalize=normalize)
u = pca.fit_transform(data, shape_3d=img.shape[:3], mask_vec=mask_vec)
_ = pca.fit_transform(img, mask)
u = pca.u_
s = pca.explained_variance_
varex_norm = pca.explained_variance_ratio_
v = pca.components_.T
Expand Down
15 changes: 7 additions & 8 deletions mapca/tests/test_mapca.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,20 @@ def test_MovingAveragePCA():
n_voxels_in_mask = np.sum(test_mask)

test_data = masking.apply_mask(test_img, test_mask_img).T
mask_vec = np.reshape(test_mask, np.prod(test_mask.shape), order="F")

# Testing AIC option
pca = MovingAveragePCA(criterion="mdl", normalize=True)
u = pca.fit_transform(test_data, shape_3d=test_img.shape[:3], mask_vec=mask_vec)
assert u.shape[0] == n_voxels_in_mask
u = pca.fit_transform(test_img, test_mask_img)
assert pca.u_.shape[0] == n_voxels_in_mask
assert pca.explained_variance_.shape[0] == 1
assert pca.explained_variance_ratio_.shape[0] == 1
assert pca.components_.T.shape[0] == N_TIMEPOINTS

# Test other stuff
pca2 = MovingAveragePCA(criterion="mdl", normalize=True)
pca2.fit(test_data, shape_3d=test_img.shape[:3], mask_vec=mask_vec)
u2 = pca2.transform(test_data)
assert np.array_equal(u2, u)
pca2.fit(test_img, test_mask_img)
u2 = pca2.transform(test_img)
assert np.array_equal(u2.get_fdata(), u.get_fdata())

test_data_est = pca2.inverse_transform(u2)
assert test_data_est.shape == test_data.shape
test_data_est = pca2.inverse_transform(u2, test_mask_img)
assert test_data_est.shape == test_img.shape

0 comments on commit c098329

Please sign in to comment.