bids-standard · leej3 · May 11, 2021 · May 11, 2021 · May 11, 2021 · May 11, 2021
diff --git a/bids/__init__.py b/bids/__init__.py
@@ -13,7 +13,8 @@
     "layout",
     "reports",
     "utils",
-    "variables"
+    "variables",
+    "statsmodels_design_synthesizer",
 ]
 
 due.cite(Doi("10.1038/sdata.2016.44"),

diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py
@@ -11,10 +11,9 @@
 import pandas as pd
 
 from bids.utils import listify, convert_JSON
-from bids.variables import SparseRunVariable
+from bids.variables import SparseRunVariable, BIDSRunVariableCollection
 from bids.modeling import transformations as pbt
 
-
 class Transformation(metaclass=ABCMeta):
 
     ### Class-level settings ###
@@ -405,13 +404,13 @@ class TransformerManager(object):
             If None, the PyBIDS transformations module is used.
     """
 
-    def __init__(self, default=None):
+    def __init__(self, default=None, save_pre_dense=False):
         self.transformations = {}
         if default is None:
             # Default to PyBIDS transformations
             default = pbt
         self.default = default
-
+        self.save_pre_dense = save_pre_dense
     def _sanitize_name(self, name):
         """ Replace any invalid/reserved transformation names with acceptable
         equivalents.
@@ -448,6 +447,7 @@ def transform(self, collection, transformations):
         transformations : list
             List of transformations to apply.
         """
+        changed_vars = []
         for t in transformations:
             t = convert_JSON(t) # make sure all keys are snake case
             kwargs = dict(t)
@@ -462,5 +462,32 @@ def transform(self, collection, transformations):
                                      "explicitly register a handler, or pass a"
                                      " default module that supports it." % name)
                 func = getattr(self.default, name)
-                func(collection, cols, **kwargs)
+
+            # check for sparse variables here and save them
+            matching_sparse_cols = []
+            if self.save_pre_dense:
+                for variable in collection.match_variables(cols, return_type='variable'):
+                    if isinstance(variable, SparseRunVariable):
+                        matching_sparse_cols.append(variable.clone())
+
+            func(collection, cols, **kwargs)
+
+            # check here to see if those variables are still sparse
+            # if so, continue, if not, save the sparse variables prior to transformation
+            if len(matching_sparse_cols) > 0:
+                for variable in matching_sparse_cols:
+                    name = variable.name
+                    matching_post_tfm = collection.match_variables(name, return_type='variable')
+                    assert len(matching_post_tfm) < 2
+                    if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
+                        changed_vars.append(variable)
+
+        if self.save_pre_dense:
+            if len(changed_vars) > 0:
+                changed_vars = BIDSRunVariableCollection(changed_vars)
+                assert np.all([isinstance(vv, SparseRunVariable) for vv in changed_vars.variables.values()])
+                return collection, changed_vars
+            else:
+                return collection, None
         return collection
+
diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
@@ -0,0 +1,125 @@
+#! /usr/bin/env python
+import argparse
+import sys
+import json
+from pathlib import Path
+import pandas as pd
+import numpy as np
+from collections import namedtuple
+from bids.modeling import transformations
+from bids.utils import convert_JSON
+from bids.variables import BIDSRunVariableCollection, SparseRunVariable
+from bids.layout.utils import parse_file_entities
+from bids.variables.io import get_events_collection
+from bids.variables.entities import RunNode
+
+
+def statsmodels_design_synthesizer(params):
+    """Console script for bids statsmodels_design_synthesizer."""
+
+    # Sampling rate of output
+    sampling_rate_out = params.get("output_sampling_rate")
+    output_dir = Path(params.get("output_dir", 'design_synthesizer'))
+    output_dir.mkdir(exist_ok=True) 
+
+    # Process transformations file
+    # TODO: abstact transforms file reading into a function.
+    # TODO: add transforms functionality, for now only model.json is handled
+    # TODO: some basic error checking to confirm the correct level of
+    # transformations has been obtained. This will most likely be the case since
+    # transformations at higher levels will no longer be required when the new
+    # "flow" approach is used.
+    transforms_file = Path(params["transforms"])
+    if not transforms_file.exists():
+        raise ValueError(f"Cannot find {transforms_file}")
+    model = convert_JSON(json.loads(transforms_file.read_text()))
+
+    if "nodes" in model:
+        nodes_key = "nodes"
+    elif "steps" in model:
+        nodes_key = "steps"
+    else:
+        raise ValueError("Cannot find a key for nodes in the model file")
+    model_transforms = model[nodes_key][0]["transformations"]
+
+    duration = params["nvol"] * params["tr"]
+
+    # Get relevant collection
+    coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
+    RunInfo = namedtuple("RunInfo", ["entities", "duration"])
+
+    #run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
+    # TODO: this will need to be implemented without RunNode to break cyclic
+    # dependencies if transformations is to be extracted
+    run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
+    coll = get_events_collection(coll_df, run, output='collection')
+
+    # perform transformations, additionally save variables that were changed
+    # TODO: need to consider sparse to sparse
+    colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)
+
+    # Save sparse vars
+    try:
+        df_sparse = colls_pre_densification.to_df(include_dense=False)
+    except AttributeError:
+        df_sparse = colls.to_df(include_dense=False)
+    df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
+    # Save dense vars
+    try:
+        df_dense = colls.to_df(include_sparse=False)
+        df_dense.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
+    except ValueError:
+        pass
+
+    # Save full design_matrix
+    if sampling_rate_out:
+        df_full = colls.to_df(sampling_rate=sampling_rate_out)
+        df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
+
+def create_parser():
+    """Returns argument parser"""
+    p = argparse.ArgumentParser()
+    p.add_argument("--events-tsv", required=True, help="Path to events TSV")
+    p.add_argument(
+        "--transforms", required=True, help="Path to transform or model json"
+    )
+    p.add_argument(
+        "--output-sampling-rate",
+        required=False,
+        type=float,
+        help="Output sampling rate in Hz when a full design matrix is desired.",
+    )
+
+    p.add_argument(
+        "--output-dir",
+        required=True,
+        help="Path to directory to write processed event files.",
+    )
+
+    ptimes = p.add_argument_group(
+        "Specify some essential details about the time series."
+    )
+    ptimes.add_argument(
+        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
+    )
+    ptimes.add_argument(
+        "--tr", required=True, type=float, help="TR for func time series"
+    )
+    ptimes.add_argument("--ta", required=True, type=float, help="TA for events")
+
+    return p
+
+
+def main(user_args=None):
+    parser = create_parser()
+    if user_args is None:
+        namespace = parser.parse_args(sys.argv[1:])
+        params = vars(namespace)
+    else:
+        params = user_args
+
+    statsmodels_design_synthesizer(params)
+
+
+if __name__ == "__main__":
+    sys.exit(main())  # pragma: no cover""Main module."""
diff --git a/bids/tests/test_statsmodels-design-synthesizer.py b/bids/tests/test_statsmodels-design-synthesizer.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+"""Tests for `bids_statsmodels_design_synthesizer` package."""
+
+import pytest
+import subprocess as sp
+from pathlib import Path
+import tempfile
+
+SYNTHESIZER = "statsmodels-design-synthesizer"
+from bids import statsmodels_design_synthesizer as synth_mod
+
+# from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
+DATA_DIR = (Path(__file__).parent / "data/ds005").absolute()
+
+# Define some example user arg combinations (without output_dir which is better
+# to define in the scope of the test)
+EXAMPLE_USER_ARGS = {
+        "transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
+        "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "tr": 2,
+        "ta": 2,
+        "nvol": 160,
+    }
+EXAMPLE_USER_ARGS_2 = {
+        "transforms": f"{DATA_DIR}/models/ds-005_type-test_model.json",
+        "events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
+        "tr": 2,
+        "ta": 2,
+        "nvol": 160,
+        "output_sampling_rate":10,
+    }
+
+def test_cli_help():
+    output = sp.check_output([SYNTHESIZER, "-h"])
+    with pytest.raises(sp.CalledProcessError):
+        output = sp.check_output([SYNTHESIZER, "--non-existent"])
+
+
+@pytest.mark.parametrize(
+    "test_case,user_args",
+    [
+        ("Model type test", EXAMPLE_USER_ARGS),
+        ("Model type mfx", EXAMPLE_USER_ARGS_2),
+    ]
+)
+def test_design_aggregation_function(tmp_path,test_case,user_args):
+    user_args['output_dir'] = str(tmp_path)
+    synth_mod.main(user_args)
+
+@pytest.mark.parametrize(
+    "test_case,user_args",
+    [
+        ("Model type test", EXAMPLE_USER_ARGS),
+        ("Model type mfx", EXAMPLE_USER_ARGS_2),
+    ]
+)
+def test_minimal_cli_functionality(tmp_path,test_case,user_args):
+    """
+    We roughly want to implement the equivalent of the following:
+    from bids.analysis import Analysis
+    from bids.layout import BIDSLayout
+
+    layout = BIDSLayout("data/ds000003")
+    analysis = Analysis(model="data/ds000003/models/model-001_smdl.json",layout=layout)
+    analysis.setup()
+
+    more specifically we want to reimplement this line
+    https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282
+    """
+    user_args['output_dir'] = str(tmp_path)
+    arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in user_args.items()])
+    cmd = f"{SYNTHESIZER} {arg_list}"
+    output = sp.check_output(cmd.split())
+
+