From 97e88f94d79367573c068f8eb798994d721407bb Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Thu, 3 Sep 2020 08:49:33 -0700
Subject: [PATCH 1/2] FIX: Improved control over correlations plot

This PR adds a ``columns`` argument to the interface and underlying
function so that fMRIPrep can select what confounds will be used to
estimate the correlations.

The PR also minimizes to 20 (it was 70!) the number of signals that are
plotted.

References: #1746
References: https://github.com/poldracklab/fmriprep/issues/2245#issuecomment-682618147
---
 niworkflows/interfaces/plotting.py |  9 ++++--
 niworkflows/viz/plots.py           | 45 ++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/niworkflows/interfaces/plotting.py b/niworkflows/interfaces/plotting.py
index f23c37eabbb..8561dda7a1c 100644
--- a/niworkflows/interfaces/plotting.py
+++ b/niworkflows/interfaces/plotting.py
@@ -140,8 +140,12 @@ class _ConfoundsCorrelationPlotInputSpec(BaseInterfaceInputSpec):
         "which all correlation magnitudes "
         "should be ranked and plotted",
     )
+    columns = traits.List(
+        traits.Str,
+        desc="Filter out all regressors not found in this list."
+    )
     max_dim = traits.Int(
-        70,
+        20,
         usedefault=True,
         desc="Maximum number of regressors to include in "
         "plot. Regressors with highest magnitude of "
@@ -172,8 +176,9 @@ def _run_interface(self, runtime):
             self._results["out_file"] = self.inputs.out_file
         confounds_correlation_plot(
             confounds_file=self.inputs.confounds_file,
+            columns=self.inputs.columns if isdefined(self.inputs.columns) else None,
+            max_dim=self.inputs.max_dim,
             output_file=self._results["out_file"],
             reference=self.inputs.reference_column,
-            max_dim=self.inputs.max_dim,
         )
         return runtime
diff --git a/niworkflows/viz/plots.py b/niworkflows/viz/plots.py
index c5dea7282d4..dd38001b198 100644
--- a/niworkflows/viz/plots.py
+++ b/niworkflows/viz/plots.py
@@ -903,40 +903,55 @@ def compcor_variance_plot(
 
 
 def confounds_correlation_plot(
-    confounds_file, output_file=None, figure=None, reference="global_signal", max_dim=70
+    confounds_file,
+    columns=None,
+    figure=None,
+    max_dim=20,
+    output_file=None,
+    reference="global_signal",
 ):
     """
+    Generate a bar plot with the correlation of confounds.
+
     Parameters
     ----------
-    confounds_file: str
+    confounds_file: :obj:`str`
         File containing all confound regressors to be included in the
         correlation plot.
-    output_file: str or None
+    figure: figure or None
+        Existing figure on which to plot.
+    columns: :obj:`list` or :obj:`None`.
+        Select a list of columns from the dataset.
+    max_dim: :obj:`int`
+        The maximum number of regressors to be included in the output plot.
+        Reductions (e.g., CompCor) of high-dimensional data can yield so many
+        regressors that the correlation structure becomes obfuscated. This
+        criterion selects the ``max_dim`` regressors that have the largest
+        correlation magnitude with ``reference`` for inclusion in the plot.
+    output_file: :obj:`str` or :obj:`None`
         Path where the output figure should be saved. If this is not defined,
         then the plotting axes will be returned instead of the saved figure
         path.
-    figure: figure or None
-        Existing figure on which to plot.
-    reference: str
-        `confounds_correlation_plot` prepares a bar plot of the correlations
+    reference: :obj:`str`
+        ``confounds_correlation_plot`` prepares a bar plot of the correlations
         of each confound regressor with a reference column. By default, this
         is the global signal (so that collinearities with the global signal
         can readily be assessed).
-    max_dim: int
-        The maximum number of regressors to be included in the output plot.
-        Reductions (e.g., CompCor) of high-dimensional data can yield so many
-        regressors that the correlation structure becomes obfuscated. This
-        criterion selects the `max_dim` regressors that have the largest
-        correlation magnitude with `reference` for inclusion in the plot.
 
     Returns
     -------
     axes and gridspec
-        Plotting axes and gridspec. Returned only if `output_file` is None.
-    output_file: str
+        Plotting axes and gridspec. Returned only if ``output_file`` is ``None``.
+    output_file: :obj:`str`
         The file where the figure is saved.
     """
     confounds_data = pd.read_table(confounds_file)
+
+    if columns:
+        columns = set(columns)  # Drop duplicates
+        columns.add(reference)  # Make sure the reference is included
+        confounds_data = confounds_data[[el for el in columns]]
+
     confounds_data = confounds_data.loc[
         :, np.logical_not(np.isclose(confounds_data.var(skipna=True), 0))
     ]

From b2e84d9bf6c6cbb53cece9c24e67615c802574e8 Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Thu, 3 Sep 2020 10:15:06 -0700
Subject: [PATCH 2/2] enh(tests): improve coverage

---
 niworkflows/tests/test_confounds.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/niworkflows/tests/test_confounds.py b/niworkflows/tests/test_confounds.py
index d11a3b442de..929a7c36193 100644
--- a/niworkflows/tests/test_confounds.py
+++ b/niworkflows/tests/test_confounds.py
@@ -160,6 +160,15 @@ def test_ConfoundsCorrelationPlot():
     """confounds correlation report test"""
     confounds_file = os.path.join(datadir, "confounds_test.tsv")
     cc_rpt = ConfoundsCorrelationPlot(
-        confounds_file=confounds_file, reference_column="a"
+        confounds_file=confounds_file, reference_column="a",
     )
     _smoke_test_report(cc_rpt, "confounds_correlation.svg")
+
+
+def test_ConfoundsCorrelationPlotColumns():
+    """confounds correlation report test"""
+    confounds_file = os.path.join(datadir, "confounds_test.tsv")
+    cc_rpt = ConfoundsCorrelationPlot(
+        confounds_file=confounds_file, reference_column="a", columns=["b", "d", "f"],
+    )
+    _smoke_test_report(cc_rpt, "confounds_correlation_cols.svg")