Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add support for experimental design aggregation #724

Draft
wants to merge 38 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
27c21bf
add initial prototype of design aggregation tool
leej3 May 11, 2021
bc6cd91
tidy arg parsing
leej3 May 11, 2021
bba5284
move get_events_collection_to variables.io
Shotgunosine May 11, 2021
0d2a888
fix data path during testing
leej3 May 11, 2021
4e41306
fix event file reading
leej3 May 11, 2021
14d32df
tidy get_events_collection
leej3 May 12, 2021
0fb031b
move loading of regressor into function
leej3 May 12, 2021
8ba2902
move loading of phys and stim files into function
leej3 May 12, 2021
605c2c0
output sampling rate not needed for reading input
leej3 May 12, 2021
b115bc0
move source logic out of get_rec_collection
leej3 May 12, 2021
63f84a5
will not drop na in records or reg collections for now
leej3 May 12, 2021
1ee5de1
use tempdir for output during test
leej3 May 12, 2021
31776fa
remove output-tsv arg and start sparse/dense saving
leej3 May 12, 2021
bc39cd6
have tfm manager check for densification or deletion of sparse variables
Shotgunosine May 12, 2021
044386b
parametrize tests
leej3 May 13, 2021
ae83df9
remove stutter
leej3 May 13, 2021
a8fb923
add test for sampling rate with associated fix
leej3 May 13, 2021
ce7a50b
move test output to the pytest temp dir
leej3 May 13, 2021
810f29e
oops
leej3 May 13, 2021
76c0c54
consider the sparse variables
leej3 May 13, 2021
a2fba92
correct indentation bug
leej3 May 13, 2021
4a6dac0
update TODOs
leej3 May 13, 2021
1094c2f
fix sparse var saving
Shotgunosine May 13, 2021
e1a977a
more fixes for sparse/dense
leej3 May 13, 2021
1889e41
add model with convolution
leej3 May 13, 2021
a1764f1
Fix sparse variable filtering
Shotgunosine May 13, 2021
5369664
fix check columns in output dataframes
Shotgunosine May 13, 2021
34a209f
use click for cli
leej3 May 13, 2021
3c57020
enh don't rely on run node for get events collection
Shotgunosine May 13, 2021
979ec10
enh remove run node from rec and reg loading
Shotgunosine May 13, 2021
69c3720
remove params, kwargs no longer captured in params
leej3 May 14, 2021
02cd6fc
add transforms reading function
leej3 May 14, 2021
a9ae623
add additional support for transformation parsing
leej3 May 14, 2021
857c5e7
Apply suggestions from code review
May 29, 2021
520bab8
Merge remote-tracking branch 'origin/master' into add_design_aggregator
leej3 May 29, 2021
6344816
rename and move to cli
leej3 May 29, 2021
bb47b4c
make ta default to tr
leej3 May 29, 2021
14391a9
improve parsing of transforms_in
leej3 May 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bids/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"layout",
"reports",
"utils",
"variables"
"variables",
"statsmodels_design_synthesizer",
]

due.cite(Doi("10.1038/sdata.2016.44"),
Expand Down
37 changes: 32 additions & 5 deletions bids/modeling/transformations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@
import pandas as pd

from bids.utils import listify, convert_JSON
from bids.variables import SparseRunVariable
from bids.variables import SparseRunVariable, BIDSRunVariableCollection
from bids.modeling import transformations as pbt


class Transformation(metaclass=ABCMeta):

### Class-level settings ###
Expand Down Expand Up @@ -405,13 +404,13 @@ class TransformerManager(object):
If None, the PyBIDS transformations module is used.
"""

def __init__(self, default=None):
def __init__(self, default=None, save_pre_dense=False):
self.transformations = {}
if default is None:
# Default to PyBIDS transformations
default = pbt
self.default = default

self.save_pre_dense = save_pre_dense
def _sanitize_name(self, name):
""" Replace any invalid/reserved transformation names with acceptable
equivalents.
Expand Down Expand Up @@ -448,6 +447,7 @@ def transform(self, collection, transformations):
transformations : list
List of transformations to apply.
"""
changed_vars = []
for t in transformations:
t = convert_JSON(t) # make sure all keys are snake case
kwargs = dict(t)
Expand All @@ -462,5 +462,32 @@ def transform(self, collection, transformations):
"explicitly register a handler, or pass a"
" default module that supports it." % name)
func = getattr(self.default, name)
func(collection, cols, **kwargs)

# check for sparse variables here and save them
matching_sparse_cols = []
if self.save_pre_dense:
for variable in collection.match_variables(cols, return_type='variable'):
if isinstance(variable, SparseRunVariable):
matching_sparse_cols.append(variable.clone())

func(collection, cols, **kwargs)

# check here to see if those variables are still sparse
# if so, continue, if not, save the sparse variables prior to transformation
if len(matching_sparse_cols) > 0:
for variable in matching_sparse_cols:
name = variable.name
matching_post_tfm = collection.match_variables(name, return_type='variable')
assert len(matching_post_tfm) < 2
if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
changed_vars.append(variable)

if self.save_pre_dense:
if len(changed_vars) > 0:
changed_vars = BIDSRunVariableCollection(changed_vars)
assert np.all([isinstance(vv, SparseRunVariable) for vv in changed_vars.variables.values()])
return collection, changed_vars
else:
return collection, None
return collection

125 changes: 125 additions & 0 deletions bids/statsmodels_design_synthesizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#! /usr/bin/env python
import argparse
import sys
import json
from pathlib import Path
import pandas as pd
import numpy as np
from collections import namedtuple
from bids.modeling import transformations
from bids.utils import convert_JSON
from bids.variables import BIDSRunVariableCollection, SparseRunVariable
from bids.layout.utils import parse_file_entities
from bids.variables.io import get_events_collection
from bids.variables.entities import RunNode


def statsmodels_design_synthesizer(params):
"""Console script for bids statsmodels_design_synthesizer."""

# Sampling rate of output
sampling_rate_out = params.get("output_sampling_rate")
output_dir = Path(params.get("output_dir", 'design_synthesizer'))
output_dir.mkdir(exist_ok=True)

# Process transformations file
# TODO: abstact transforms file reading into a function.
# TODO: add transforms functionality, for now only model.json is handled
# TODO: some basic error checking to confirm the correct level of
# transformations has been obtained. This will most likely be the case since
# transformations at higher levels will no longer be required when the new
# "flow" approach is used.
transforms_file = Path(params["transforms"])
if not transforms_file.exists():
raise ValueError(f"Cannot find {transforms_file}")
model = convert_JSON(json.loads(transforms_file.read_text()))

if "nodes" in model:
nodes_key = "nodes"
elif "steps" in model:
nodes_key = "steps"
else:
raise ValueError("Cannot find a key for nodes in the model file")
model_transforms = model[nodes_key][0]["transformations"]

duration = params["nvol"] * params["tr"]

# Get relevant collection
coll_df = pd.read_csv(params["events_tsv"], delimiter="\t")
RunInfo = namedtuple("RunInfo", ["entities", "duration"])

#run_info = RunInfo(parse_file_entities(params["events_tsv"]), duration)
# TODO: this will need to be implemented without RunNode to break cyclic
# dependencies if transformations is to be extracted
run = RunNode(parse_file_entities(params["events_tsv"]), None, duration, params["tr"], params["nvol"])
coll = get_events_collection(coll_df, run, output='collection')

# perform transformations, additionally save variables that were changed
# TODO: need to consider sparse to sparse
colls, colls_pre_densification = transformations.TransformerManager(save_pre_dense=True).transform(coll, model_transforms)

# Save sparse vars
try:
df_sparse = colls_pre_densification.to_df(include_dense=False)
except AttributeError:
df_sparse = colls.to_df(include_dense=False)
df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
# Save dense vars
try:
df_dense = colls.to_df(include_sparse=False)
df_dense.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
except ValueError:
pass

# Save full design_matrix
if sampling_rate_out:
df_full = colls.to_df(sampling_rate=sampling_rate_out)
df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")

def create_parser():
"""Returns argument parser"""
p = argparse.ArgumentParser()
p.add_argument("--events-tsv", required=True, help="Path to events TSV")
p.add_argument(
"--transforms", required=True, help="Path to transform or model json"
)
p.add_argument(
"--output-sampling-rate",
required=False,
type=float,
help="Output sampling rate in Hz when a full design matrix is desired.",
)

p.add_argument(
"--output-dir",
required=True,
help="Path to directory to write processed event files.",
)

ptimes = p.add_argument_group(
"Specify some essential details about the time series."
)
ptimes.add_argument(
"--nvol", required=True, type=int, help="Number of volumes in func time-series"
)
ptimes.add_argument(
"--tr", required=True, type=float, help="TR for func time series"
)
ptimes.add_argument("--ta", required=True, type=float, help="TA for events")

return p


def main(user_args=None):
parser = create_parser()
if user_args is None:
namespace = parser.parse_args(sys.argv[1:])
params = vars(namespace)
else:
params = user_args

statsmodels_design_synthesizer(params)


if __name__ == "__main__":
sys.exit(main()) # pragma: no cover""Main module."""
76 changes: 76 additions & 0 deletions bids/tests/test_statsmodels-design-synthesizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python

"""Tests for `bids_statsmodels_design_synthesizer` package."""

import pytest
import subprocess as sp
from pathlib import Path
import tempfile

SYNTHESIZER = "statsmodels-design-synthesizer"
from bids import statsmodels_design_synthesizer as synth_mod

# from bids_statsmodels_design_synthesizer import Path(SYNTHESIZER).stem as synth_mod
DATA_DIR = (Path(__file__).parent / "data/ds005").absolute()

# Define some example user arg combinations (without output_dir which is better
# to define in the scope of the test)
EXAMPLE_USER_ARGS = {
"transforms": f"{DATA_DIR}/models/ds-005_type-mfx_model.json",
"events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
"tr": 2,
"ta": 2,
"nvol": 160,
}
EXAMPLE_USER_ARGS_2 = {
"transforms": f"{DATA_DIR}/models/ds-005_type-test_model.json",
"events_tsv": f"{DATA_DIR}/sub-01/func/sub-01_task-mixedgamblestask_run-01_events.tsv",
"tr": 2,
"ta": 2,
"nvol": 160,
"output_sampling_rate":10,
}

def test_cli_help():
output = sp.check_output([SYNTHESIZER, "-h"])
with pytest.raises(sp.CalledProcessError):
output = sp.check_output([SYNTHESIZER, "--non-existent"])


@pytest.mark.parametrize(
"test_case,user_args",
[
("Model type test", EXAMPLE_USER_ARGS),
("Model type mfx", EXAMPLE_USER_ARGS_2),
]
)
def test_design_aggregation_function(tmp_path,test_case,user_args):
user_args['output_dir'] = str(tmp_path)
synth_mod.main(user_args)

@pytest.mark.parametrize(
"test_case,user_args",
[
("Model type test", EXAMPLE_USER_ARGS),
("Model type mfx", EXAMPLE_USER_ARGS_2),
]
)
def test_minimal_cli_functionality(tmp_path,test_case,user_args):
"""
We roughly want to implement the equivalent of the following:
from bids.analysis import Analysis
from bids.layout import BIDSLayout

layout = BIDSLayout("data/ds000003")
analysis = Analysis(model="data/ds000003/models/model-001_smdl.json",layout=layout)
analysis.setup()

more specifically we want to reimplement this line
https://github.com/bids-standard/pybids/blob/b6cd0f6787230ce976a374fbd5fce650865752a3/bids/analysis/analysis.py#L282
"""
user_args['output_dir'] = str(tmp_path)
arg_list = " " .join([f"""--{k.lower().replace("_","-")}={v}""" for k,v in user_args.items()])
cmd = f"{SYNTHESIZER} {arg_list}"
output = sp.check_output(cmd.split())


Loading