Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Simplify code of classification #28

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
332 changes: 32 additions & 300 deletions aroma/utils.py
Original file line number Diff line number Diff line change
@@ -1,256 +1,55 @@
# CHANGES
# -------
# Log of changes as mandated by the original Apache 2.0 License of ICA-AROMA
#
# * Drop ``runICA`` and ``register2MNI`` functions
# * Base ``classifier`` on Pandas, and revise signature (``predict(X)``)
# to make it more similar to scikit learn
# * Return classification labels directly on ``predict``
#
"""Utility functions for ICA-AROMA."""
import logging
import os
import os.path as op
import shutil

import nibabel as nib
import numpy as np
from nilearn import image, masking
from nilearn import masking

LGR = logging.getLogger(__name__)

# Define criteria needed for classification (thresholds and
# hyperplane-parameters)
THR_CSF = 0.10
THR_HFC = 0.35
HYPERPLANE = [-19.9751070082159, 9.95127547670627, 24.8333160239175]

def runICA(fsl_dir, in_file, out_dir, mel_dir_in, mask, dim, TR):
"""Run MELODIC and merge the thresholded ICs into a single 4D nifti file.

Parameters
----------
fsl_dir : str
Full path of the bin-directory of FSL
in_file : str
Full path to the fMRI data file (nii.gz) on which MELODIC
should be run
out_dir : str
Full path of the output directory
mel_dir_in : str or None
Full path of the MELODIC directory in case it has been run
before, otherwise None.
mask : str
Full path of the mask to be applied during MELODIC
dim : int
Dimensionality of ICA
TR : float
TR (in seconds) of the fMRI data

Output
------
melodic.ica/: MELODIC directory
melodic_IC_thr.nii.gz: Merged file containing the mixture modeling
thresholded Z-statistical maps located in
melodic.ica/stats/
def predict(X, thr_csf=THR_CSF, thr_hfc=THR_HFC, hplane=HYPERPLANE):
"""
# Define the 'new' MELODIC directory and predefine some associated files
mel_dir = op.join(out_dir, "melodic.ica")
mel_IC = op.join(mel_dir, "melodic_IC.nii.gz")
mel_IC_mix = op.join(mel_dir, "melodic_mix")
mel_IC_thr = op.join(out_dir, "melodic_IC_thr.nii.gz")

# When a MELODIC directory is specified,
# check whether all needed files are present.
# Otherwise... run MELODIC again
if (
mel_dir_in
and op.isfile(op.join(mel_dir_in, "melodic_IC.nii.gz"))
and op.isfile(op.join(mel_dir_in, "melodic_FTmix"))
and op.isfile(op.join(mel_dir_in, "melodic_mix"))
):
LGR.info(' - The existing/specified MELODIC directory will be used.')

# If a 'stats' directory is present (contains thresholded spatial maps)
# create a symbolic link to the MELODIC directory.
# Otherwise create specific links and
# run mixture modeling to obtain thresholded maps.
if op.isdir(op.join(mel_dir_in, "stats")):
os.symlink(mel_dir_in, mel_dir)
else:
LGR.warning(" - The MELODIC directory does not contain the "
"required 'stats' folder. Mixture modeling on the "
"Z-statistical maps will be run.")

# Create symbolic links to the items in the specified melodic
# directory
os.makedirs(mel_dir)
for item in os.listdir(mel_dir_in):
os.symlink(op.join(mel_dir_in, item), op.join(mel_dir, item))

# Run mixture modeling
melodic_command = ("{0} --in={1} --ICs={1} --mix={2} "
"--out_dir={3} --0stats --mmthresh=0.5").format(
op.join(fsl_dir, 'melodic'),
mel_IC,
mel_IC_mix,
mel_dir,
)
os.system(melodic_command)
else:
# If a melodic directory was specified, display that it did not
# contain all files needed for ICA-AROMA (or that the directory
# does not exist at all)
if mel_dir_in:
if not op.isdir(mel_dir_in):
LGR.warning(' - The specified MELODIC directory does not '
'exist. MELODIC will be run separately.')
else:
LGR.warning(' - The specified MELODIC directory does not '
'contain the required files to run ICA-AROMA. '
'MELODIC will be run separately.')

# Run MELODIC
melodic_command = (
"{0} --in={1} --outdir={2} --mask={3} --dim={4} "
"--Ostats --nobet --mmthresh=0.5 --report "
"--tr={5}"
).format(op.join(fsl_dir, "melodic"), in_file, mel_dir, mask, dim, TR)
os.system(melodic_command)

# Get number of components
mel_IC_img = nib.load(mel_IC)
nr_ICs = mel_IC_img.shape[3]

# Merge mixture modeled thresholded spatial maps. Note! In case that
# mixture modeling did not converge, the file will contain two spatial
# maps. The latter being the results from a simple null hypothesis test.
# In that case, this map will have to be used (first one will be empty).
zstat_imgs = []
for i in range(1, nr_ICs + 1):
# Define thresholded zstat-map file
z_temp = op.join(mel_dir, "stats", "thresh_zstat{0}.nii.gz".format(i))
Classify components as motion or non-motion based on four features.

# Get number of volumes in component's thresholded image
z_temp_img = nib.load(z_temp)
if z_temp_img.ndim == 4:
len_IC = z_temp_img.shape[3]
# Extract last spatial map within the thresh_zstat file
zstat_img = image.index_img(z_temp_img, len_IC - 1)
else:
zstat_img = z_temp_img

zstat_imgs.append(zstat_img)

# Merge to 4D
zstat_4d_img = image.concat_imgs(zstat_imgs)

# Apply the mask to the merged image (in case a melodic-directory was
# predefined and run with a different mask)
zstat_4d_img = image.math_img(
"stat * mask[:, :, :, None]", stat=zstat_4d_img, mask=mask
)
zstat_4d_img.to_filename(mel_IC_thr)


def register2MNI(fsl_dir, in_file, out_file, affmat, warp):
"""Register an image (or time-series of images) to MNI152 T1 2mm.

If no affmat is defined, it only warps (i.e. it assumes that the data has
been registered to the structural scan associated with the warp-file
already). If no warp is defined either, it only resamples the data to 2mm
isotropic if needed (i.e. it assumes that the data has been registered to
a MNI152 template). In case only an affmat file is defined, it assumes that
the data has to be linearly registered to MNI152 (i.e. the user has a
reason not to use non-linear registration on the data).
The four features used for classification are: maximum RP correlation,
high-frequency content, edge-fraction, and CSF-fraction.

Parameters
----------
fsl_dir : str
Full path of the bin-directory of FSL
in_file : str
Full path to the data file (nii.gz) which has to be registerd to
MNI152 T1 2mm
out_file : str
Full path of the output file
affmat : str
Full path of the mat file describing the linear registration (if data
is still in native space)
warp : str
Full path of the warp file describing the non-linear registration (if
data has not been registered to MNI152 space yet)

Output
------
melodic_IC_mm_MNI2mm.nii.gz : merged file containing the mixture modeling
thresholded Z-statistical maps registered to
MNI152 2mm
"""
# Define the MNI152 T1 2mm template
fslnobin = fsl_dir.rsplit("/", 2)[0]
ref = op.join(fslnobin, "data", "standard", "MNI152_T1_2mm_brain.nii.gz")

# If the no affmat- or warp-file has been specified, assume that the data
# is already in MNI152 space. In that case only check if resampling to
# 2mm is needed
if not affmat and not warp:
in_img = nib.load(in_file)
# Get 3D voxel size
pixdim1, pixdim2, pixdim3 = in_img.header.get_zooms()[:3]

# If voxel size is not 2mm isotropic, resample the data, otherwise
# copy the file
if (pixdim1 != 2) or (pixdim2 != 2) or (pixdim3 != 2):
os.system(
" ".join(
[
op.join(fsl_dir, "flirt"),
" -ref " + ref,
" -in " + in_file,
" -out " + out_file,
" -applyisoxfm 2 -interp trilinear",
]
)
)
else:
os.copyfile(in_file, out_file)
X : :obj:`pandas.DataFrame`
Features table (C x 4), must contain the following columns:
"edge_fract", "csf_fract", "max_RP_corr", and "HFC".

# If only a warp-file has been specified, assume that the data has already
# been registered to the structural scan. In that case apply the warping
# without a affmat
elif not affmat and warp:
# Apply warp
os.system(
" ".join(
[
op.join(fsl_dir, "applywarp"),
"--ref=" + ref,
"--in=" + in_file,
"--out=" + out_file,
"--warp=" + warp,
"--interp=trilinear",
]
)
)
Returns
-------
y : array_like
Classification (``True`` if the component is a motion one).

# If only a affmat-file has been specified perform affine registration to
# MNI
elif affmat and not warp:
os.system(
" ".join(
[
op.join(fsl_dir, "flirt"),
"-ref " + ref,
"-in " + in_file,
"-out " + out_file,
"-applyxfm -init " + affmat,
"-interp trilinear",
]
)
)
"""
# Project edge & max_RP_corr feature scores to new 1D space
x = X[["max_RP_corr", "edge_fract"]].values
proj = (hplane[0] + np.dot(x.T, hplane[1:])) > 0

# If both a affmat- and warp-file have been defined, apply the warping
# accordingly
else:
os.system(
" ".join(
[
op.join(fsl_dir, "applywarp"),
"--ref=" + ref,
"--in=" + in_file,
"--out=" + out_file,
"--warp=" + warp,
"--premat=" + affmat,
"--interp=trilinear",
]
)
)
# Classify the ICs
return (X["csf_fract"] > thr_csf) | (X["HFC"] > thr_hfc) | proj


def cross_correlation(a, b):
Expand All @@ -275,73 +74,6 @@ def cross_correlation(a, b):
return np.corrcoef(a.T, b.T)[:ncols_a, ncols_a:]


def classification(features_df, out_dir):
"""Classify components as motion or non-motion based on four features.

The four features used for classification are: maximum RP correlation,
high-frequency content, edge-fraction, and CSF-fraction.

Parameters
----------
features_df : (C x 4) pandas.DataFrame
DataFrame with the following columns:
"edge_fract", "csf_fract", "max_RP_corr", and "HFC".
out_dir : str
Full path of the output directory

Returns
-------
motion_ICs : array_like
Array containing the indices of the components identified as motion
components

Output
------
classified_motion_ICs.txt : A text file containing the indices of the
components identified as motion components
"""
# Put the feature scores in a text file
features_df.to_csv(op.join(out_dir, "feature_scores.tsv"), sep="\t", index=False)

# Classify the ICs as motion or non-motion

# Define criteria needed for classification (thresholds and
# hyperplane-parameters)
THR_CSF = 0.10
THR_HFC = 0.35
HYPERPLANE = np.array([-19.9751070082159, 9.95127547670627, 24.8333160239175])

# Project edge & max_RP_corr feature scores to new 1D space
x = features_df[["max_RP_corr", "edge_fract"]].values
proj = HYPERPLANE[0] + np.dot(x.T, HYPERPLANE[1:])

# Classify the ICs
is_motion = (
(features_df["csf_fract"] > THR_CSF)
| (features_df["HFC"] > THR_HFC)
| (proj > 0)
)
features_df["classification"] = is_motion
features_df["classification"] = features_df["classification"].map(
{True: "rejected", False: "accepted"}
)

# Put the indices of motion-classified ICs in a text file (starting with 1)
motion_ICs = features_df[
"classification", features_df["classification"] == "rejected"
].index.values
with open(op.join(out_dir, "classified_motion_ICs.txt"), "w") as fo:
out_str = ",".join(motion_ICs.astype(str))
fo.write(out_str)

# Create a summary overview of the classification
features_df.to_csv(
op.join(out_dir, "classification_overview.txt"), sep="\t", index_label="IC"
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this file is no longer written out here, it would be good to write it out in the workflow. We also need a corresponding change in the workflow function, I think.


return motion_ICs


def denoising(fsl_dir, in_file, out_dir, mixing, den_type, den_idx):
"""Remove noise components from fMRI data.

Expand Down