diff --git a/aroma/aroma.py b/aroma/aroma.py index 3ac2677..bb98c46 100644 --- a/aroma/aroma.py +++ b/aroma/aroma.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from aroma import _version, features, utils +from aroma import _version, classification, features, io, utils LGR = logging.getLogger(__name__) @@ -77,25 +77,22 @@ def aroma_workflow( ) return elif op.isdir(out_dir) and overwrite: - LGR.warning( - "Output directory {} exists and will be overwritten." - "\n".format(out_dir) - ) + LGR.warning("Output directory {} exists and will be overwritten.\n".format(out_dir)) shutil.rmtree(out_dir) os.makedirs(out_dir) else: os.makedirs(out_dir) # Create logfile name - basename = 'aroma_' - extension = 'tsv' - isotime = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') - logname = os.path.join(out_dir, (basename + isotime + '.' + extension)) + basename = "aroma_" + extension = "tsv" + isotime = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") + logname = os.path.join(out_dir, (basename + isotime + "." + extension)) # Set logging format log_formatter = logging.Formatter( - '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', - datefmt='%Y-%m-%dT%H:%M:%S') + "%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s", datefmt="%Y-%m-%dT%H:%M:%S" + ) # Set up logging file and open it for writing log_handler = logging.FileHandler(logname) @@ -104,20 +101,22 @@ def aroma_workflow( # add logger mode options if quiet: - logging.basicConfig(level=logging.WARNING, - handlers=[log_handler, sh], - format='%(levelname)-10s %(message)s') + logging.basicConfig( + level=logging.WARNING, + handlers=[log_handler, sh], + format="%(levelname)-10s %(message)s", + ) elif debug: - logging.basicConfig(level=logging.DEBUG, - handlers=[log_handler, sh], - format='%(levelname)-10s %(message)s') + logging.basicConfig( + level=logging.DEBUG, handlers=[log_handler, sh], format="%(levelname)-10s %(message)s" + ) else: - logging.basicConfig(level=logging.INFO, - handlers=[log_handler, sh], - format='%(levelname)-10s %(message)s') + logging.basicConfig( + level=logging.INFO, handlers=[log_handler, sh], format="%(levelname)-10s %(message)s" + ) - version_number = _version.get_versions()['version'] - LGR.info(f'Currently running ICA-AROMA version {version_number}') + version_number = _version.get_versions()["version"] + LGR.info(f"Currently running ICA-AROMA version {version_number}") # Check if the type of denoising is correctly specified, when specified if den_type not in ("nonaggr", "aggr", "both", "no"): @@ -135,11 +134,7 @@ def aroma_workflow( # Check TR if TR == 1: - LGR.warning( - "Please check whether the determined TR (of " - + str(TR) - + "s) is correct!\n" - ) + LGR.warning("Please check whether the determined TR (of " + str(TR) + "s) is correct!\n") elif TR == 0: raise Exception( "TR is zero. ICA-AROMA requires a valid TR and will therefore " @@ -170,7 +165,7 @@ def aroma_workflow( ( features_df["edge_fract"], features_df["csf_fract"], - metric_metadata + metric_metadata, ) = features.feature_spatial(component_maps, metric_metadata) LGR.info(" - extracting the Maximum RP correlation feature") @@ -190,17 +185,19 @@ def aroma_workflow( ) LGR.info(" - classification") - features_df, metric_metadata = utils.classification(features_df, metric_metadata) - motion_ICs = utils.write_metrics(features_df, out_dir, metric_metadata) + classification_labels = classification.predict(features_df, metric_metadata=metric_metadata) + features_df["classification"] = classification_labels + motion_ICs = io.write_metrics(features_df, out_dir, metric_metadata) if generate_plots: - from . import plotting - plotting.classification_plot( - op.join(out_dir, "desc-AROMA_metrics.tsv"), out_dir - ) + from aroma import plotting + + plotting.classification_plot(op.join(out_dir, "desc-AROMA_metrics.tsv"), out_dir) if den_type != "no": LGR.info("Step 3) Data denoising") - utils.denoising(in_file, out_dir, mixing, den_type, motion_ICs) + # Index of the components that were classified as "rejected" + rejected_components = np.where(classification_labels == "rejected")[0] + utils.denoising(in_file, out_dir, mixing, den_type, rejected_components) LGR.info("Finished") diff --git a/aroma/classification.py b/aroma/classification.py new file mode 100644 index 0000000..e450e5b --- /dev/null +++ b/aroma/classification.py @@ -0,0 +1,110 @@ +# CHANGES +# ------- +# Log of changes as mandated by the original Apache 2.0 License of ICA-AROMA +# +# * Drop ``runICA`` and ``register2MNI`` functions +# * Base ``classifier`` on Pandas, and revise signature (``predict(X)``) +# to make it more similar to scikit learn +# * Return classification labels directly on ``predict`` +# +"""Classification functions for ICA-AROMA.""" +import logging + +import numpy as np + +LGR = logging.getLogger(__name__) + +# Define criteria needed for classification (thresholds and +# hyperplane-parameters) +THR_CSF = 0.10 +THR_HFC = 0.35 +HYPERPLANE = np.array([-19.9751070082159, 9.95127547670627, 24.8333160239175]) + + +def hfc_criteria(x, thr_hfc=THR_HFC): + """ + Compute the HFC criteria for classification. + + Parameters + ---------- + x : numpy.ndarray + Projection of HFC feature scores to new 1D space. + + Returns + ------- + numpy.ndarray + Classification (``True`` if the component is a motion one). + """ + return x > thr_hfc + + +def csf_criteria(x, thr_csf=THR_CSF): + """ + Compute the CSF criteria for classification. + + Parameters + ---------- + x : numpy.ndarray + Projection of CSF-fraction feature scores to new 1D space. + + Returns + ------- + numpy.ndarray + Classification (``True`` if the component is a CSF one). + """ + return x > thr_csf + + +def hplane_criteria(x, hplane=HYPERPLANE): + """ + Compute the hyperplane criteria for classification. + + Parameters + ---------- + x : numpy.ndarray + Projection of edge & max_RP_corr feature scores to new 1D space. + + Returns + ------- + :obj:`pandas.DataFrame` + Features table with additional column "classification". + + """ + return (hplane[0] + np.dot(x, hplane[1:])) > 0 + + +def predict(X, thr_csf=THR_CSF, thr_hfc=THR_HFC, hplane=HYPERPLANE, metric_metadata=None): + """ + Classify components as motion or non-motion based on four features. + + The four features used for classification are: maximum RP correlation, + high-frequency content, edge-fraction, and CSF-fraction. + + Parameters + ---------- + X : :obj:`pandas.DataFrame` + Features table (C x 4), must contain the following columns: + "edge_fract", "csf_fract", "max_RP_corr", and "HFC". + + Returns + ------- + y : array_like + Classification (``True`` if the component is a motion one). + """ + # Project edge & max_RP_corr feature scores to new 1D space + proj = hplane_criteria(X[["max_RP_corr", "edge_fract"]].values, hplane=hplane) + + # Compute the CSF criteria + csf = csf_criteria(X["csf_fract"].values, thr_csf=thr_csf) + + # Compute the HFC criteria + hfc = hfc_criteria(X["HFC"].values, thr_hfc=thr_hfc) + + # Combine the criteria + classification = csf | hfc | proj + + #  Turn classification into a list of string labels with rejected if true, accepted if false + classification = ["rejected" if c else "accepted" for c in classification] + + # Classify the ICs + return classification diff --git a/aroma/io.py b/aroma/io.py new file mode 100644 index 0000000..a663b4f --- /dev/null +++ b/aroma/io.py @@ -0,0 +1,45 @@ +"""Input/output functions for the Aroma project.""" +import json +import os.path as op + + +def write_metrics(features_df, out_dir, metric_metadata=None): + """Write out feature/classification information and metadata. + Parameters + ---------- + features_df : (C x 5) :obj:`pandas.DataFrame` + DataFrame with metric values and classifications. + Must have the following columns: "edge_fract", "csf_fract", "max_RP_corr", "HFC", and + "classification". + out_dir : :obj:`str` + Output directory. + metric_metadata : :obj:`dict` or None, optional + Metric metadata in a dictionary. + Returns + ------- + motion_ICs : array_like + Array containing the indices of the components identified as motion components. + Output + ------ + AROMAnoiseICs.csv : A text file containing the indices of the + components identified as motion components + desc-AROMA_metrics.tsv + desc-AROMA_metrics.json + """ + # Put the indices of motion-classified ICs in a text file (starting with 1) + motion_ICs = features_df["classification"][features_df["classification"] == "rejected"].index + motion_ICs = motion_ICs.values + + with open(op.join(out_dir, "AROMAnoiseICs.csv"), "w") as fo: + out_str = ",".join(motion_ICs.astype(str)) + fo.write(out_str) + + # Create a summary overview of the classification + out_file = op.join(out_dir, "desc-AROMA_metrics.tsv") + features_df.to_csv(out_file, sep="\t", index_label="IC") + + if isinstance(metric_metadata, dict): + with open(op.join(out_dir, "desc-AROMA_metrics.json"), "w") as fo: + json.dump(metric_metadata, fo, sort_keys=True, indent=4) + + return motion_ICs diff --git a/aroma/tests/test_classification.py b/aroma/tests/test_classification.py new file mode 100644 index 0000000..6bfe6d2 --- /dev/null +++ b/aroma/tests/test_classification.py @@ -0,0 +1,12 @@ +"""Tests for aroma.classification.""" +import pandas as pd +from aroma import classification + + +def test_classification(classification_overview): + """Test aroma.utils.classification and ensure classifications come out the same.""" + clf_overview_df = pd.read_table(classification_overview) + test_df = clf_overview_df[["edge_fract", "csf_fract", "max_RP_corr", "HFC"]] + test_classifications = classification.predict(test_df, metric_metadata={}) + true_classifications = clf_overview_df["classification"].tolist() + assert true_classifications == test_classifications diff --git a/aroma/tests/test_utils.py b/aroma/tests/test_utils.py index 985d1d2..9da71cf 100644 --- a/aroma/tests/test_utils.py +++ b/aroma/tests/test_utils.py @@ -1,22 +1,10 @@ """Tests for aroma.utils.""" -import os - import numpy as np import pandas as pd import pytest from aroma import utils -def test_classification(classification_overview): - """Test aroma.utils.classification and ensure classifications come out the same.""" - clf_overview_df = pd.read_table(classification_overview) - test_df = clf_overview_df[["edge_fract", "csf_fract", "max_RP_corr", "HFC"]] - test_df, metadata = utils.classification(test_df, {}) - true_classifications = clf_overview_df["classification"].tolist() - test_classifications = test_df["classification"].tolist() - assert true_classifications == test_classifications - - def test_load_motpars_manual(motion_parameters): """Test aroma.utils.load_motpars with manual source determination.""" fsl = utils.load_motpars(motion_parameters["FSL"], source="fsl") diff --git a/aroma/utils.py b/aroma/utils.py index 0e8ea4b..0ea8856 100644 --- a/aroma/utils.py +++ b/aroma/utils.py @@ -1,5 +1,4 @@ """Utility functions for ICA-AROMA.""" -import json import logging import os.path as op import shutil @@ -40,134 +39,6 @@ def cross_correlation(a, b): return np.corrcoef(a.T, b.T)[:ncols_a, ncols_a:] -def classification(features_df, metric_metadata=None): - """Classify components as motion or non-motion based on four features. - - The four features used for classification are: maximum RP correlation, - high-frequency content, edge-fraction, and CSF-fraction. - - Parameters - ---------- - features_df : (C x 4) :obj:`pandas.DataFrame` - DataFrame with the following columns: - "edge_fract", "csf_fract", "max_RP_corr", and "HFC". - metric_metadata : :obj:`dict` or None, optional - Metric metadata in a dictionary. - - Returns - ------- - features_df - metric_metadata - """ - # Define criteria needed for classification (thresholds and - # hyperplane-parameters) - THR_CSF = 0.10 - THR_HFC = 0.35 - HYPERPLANE = np.array([-19.9751070082159, 9.95127547670627, 24.8333160239175]) - - if isinstance(metric_metadata, dict): - metric_metadata["classification"] = { - "LongName": "Component classification", - "Description": ("Classification from the classification procedure."), - "Levels": { - "accepted": "A component that is determined not to be associated with motion.", - "rejected": "A motion-related component.", - }, - } - metric_metadata["rationale"] = { - "LongName": "Rationale for component classification", - "Description": ( - "The reason for the classification. " - "In cases where components are classified based on more than one criterion, " - "they are listed sequentially, separated by semicolons." - ), - "Levels": { - "CSF": f"The csf_fract value is higher than {THR_CSF}", - "HFC": f"The HFC value is higher than {THR_HFC}", - "hyperplane": ( - "After the max_RP_corr and edge_fract values are projected " - "to a hyperplane, the projected point is less than zero." - ), - }, - } - - # Classify the ICs as motion (rejected) or non-motion (accepted) - features_df["classification"] = "accepted" - features_df["rationale"] = "" - - # CSF - rej_csf = features_df["csf_fract"] > THR_CSF - features_df.loc[rej_csf, "classification"] = "rejected" - features_df.loc[rej_csf, "rationale"] += "CSF;" - - # HFC - rej_hfc = features_df["HFC"] > THR_HFC - features_df.loc[rej_hfc, "classification"] = "rejected" - features_df.loc[rej_hfc, "rationale"] += "HFC;" - - # Hyperplane - # Project edge & max_RP_corr feature scores to new 1D space - x = features_df[["max_RP_corr", "edge_fract"]].values - proj = HYPERPLANE[0] + np.dot(x, HYPERPLANE[1:]) - rej_hyperplane = proj > 0 - features_df.loc[rej_hyperplane, "classification"] = "rejected" - features_df.loc[rej_hyperplane, "rationale"] += "hyperplane;" - - # Classify the ICs - is_motion = (features_df["csf_fract"] > THR_CSF) | (features_df["HFC"] > THR_HFC) | (proj > 0) - assert np.array_equal(is_motion, (features_df["classification"] == "rejected").values) - - # Reorder columns and remove trailing semicolons - features_df = clean_dataframe(features_df) - - return features_df, metric_metadata - - -def write_metrics(features_df, out_dir, metric_metadata=None): - """Write out feature/classification information and metadata. - - Parameters - ---------- - features_df : (C x 5) :obj:`pandas.DataFrame` - DataFrame with metric values and classifications. - Must have the following columns: "edge_fract", "csf_fract", "max_RP_corr", "HFC", and - "classification". - out_dir : :obj:`str` - Output directory. - metric_metadata : :obj:`dict` or None, optional - Metric metadata in a dictionary. - - Returns - ------- - motion_ICs : array_like - Array containing the indices of the components identified as motion components. - - Output - ------ - AROMAnoiseICs.csv : A text file containing the indices of the - components identified as motion components - desc-AROMA_metrics.tsv - desc-AROMA_metrics.json - """ - # Put the indices of motion-classified ICs in a text file (starting with 1) - motion_ICs = features_df["classification"][features_df["classification"] == "rejected"].index - motion_ICs = motion_ICs.values - - with open(op.join(out_dir, "AROMAnoiseICs.csv"), "w") as fo: - out_str = ",".join(motion_ICs.astype(str)) - fo.write(out_str) - - # Create a summary overview of the classification - out_file = op.join(out_dir, "desc-AROMA_metrics.tsv") - features_df.to_csv(out_file, sep="\t", index_label="IC") - - if isinstance(metric_metadata, dict): - with open(op.join(out_dir, "desc-AROMA_metrics.json"), "w") as fo: - json.dump(metric_metadata, fo, sort_keys=True, indent=4) - - return motion_ICs - - def denoising(in_file, out_dir, mixing, den_type, den_idx): """Remove noise components from fMRI data.