forked from rphlypo/parietalretreat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup_data_paths.py
42 lines (34 loc) · 1.31 KB
/
setup_data_paths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import load_data
import glob
import re
import os.path
def get_subject_ids(data_set):
"""
for a given dataset, get the different subjects as a list
this allows for setting up the lists of the data paths
but is also convenient for sampling of train-test splitting based
on the subject list
arguments:
---------
data_set: string
at this moment the only datasets that are supported are
"hcp", "henson2010faces", or openfmri datasets "ds???"
returns:
-------
subj_ids: list of strings with subject ids in the dataset
"""
if data_set == "hcp":
# subjects from HCP
base_dir = "/home/storage/data/HCP/Q2"
subj_str = "[0-9]" * 6
elif data_set.lower()[:2] == "ds" or data_set == "henson2010faces":
# subject from any openfmri data set
base_dir = os.path.join("/home/storage/workspace/brainpedia/preproc",
data_set)
subj_str = "sub" + "[0-9]" * 3
return _subject_ids(base_dir, subj_str)
def _subject_ids(base_dir, subj_str):
regexp = re.compile(os.path.join(base_dir, format(subj_str)))
subject_paths = glob.glob(os.path.join(base_dir, "*"))
subject_paths = [p for p in subject_paths if regexp.match(p)]
return [os.path.basename(subj_path) for subj_path in subject_paths]