From e25244b20a9f24bed29163039e24838e93faa109 Mon Sep 17 00:00:00 2001
From: lwehbe <lwehbe@cs.cmu.edu>
Date: Sun, 9 Oct 2022 21:17:16 -0400
Subject: [PATCH] Add files via upload

---
 make_predictions.py | 103 ++++++++++
 ridge_tools.py      | 102 ++++++++++
 utils.py            | 455 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 660 insertions(+)
 create mode 100644 make_predictions.py
 create mode 100644 ridge_tools.py
 create mode 100644 utils.py

diff --git a/make_predictions.py b/make_predictions.py
new file mode 100644
index 0000000..078fefb
--- /dev/null
+++ b/make_predictions.py
@@ -0,0 +1,103 @@
+import argparse
+import numpy as np
+import warnings
+
+from utils import delay_mat, delay_one, zscore_word_data, run_class_time_CV_crossval_ridge, run_class_time_CV_fmri_crossval_ridge
+from utils import subject_runs, surfaces, transforms, load_transpose_zscore, smooth_run_not_masked, CV_ind
+
+
+from sklearn.decomposition import PCA
+from scipy.stats import zscore
+import scipy.io as sio
+
+import time as tm
+from scipy import signal
+import pickle as pk
+
+SKIP_WORDS = 20
+END_WORDS = 5176
+    
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--subject", required=True)
+    parser.add_argument("--predict_feat_type", required=True)
+    parser.add_argument("--nlp_feat_type", required=True)
+    parser.add_argument("--nlp_feat_dir", required=True)
+    parser.add_argument("--layer", type=int, required=False)
+    parser.add_argument("--sequence_length", type=int, required=False)
+    parser.add_argument("--output_fname_prefix", required=True)
+    parser.add_argument("--output_dir", required=True)
+    parser.add_argument("--delay", type=int, default=0)
+    parser.add_argument("--recording_type", required=True)
+    parser.add_argument("--regress_feat_types", default=0)
+    parser.add_argument("--encoding", type=int, default=1)
+    
+    args = parser.parse_args()
+    print(args)
+    
+    if args.regress_feat_types != '0':
+        regress_feat_names_list = np.sort(args.regress_feat_types.split('+'))
+    else:
+        regress_feat_names_list = []
+        
+    predict_feat_dict = {'feat_type':args.predict_feat_type, 
+                         'nlp_feat_type':args.nlp_feat_type,
+                         'nlp_feat_dir':args.nlp_feat_dir,
+                         'layer':args.layer,
+                         'seq_len':args.sequence_length}
+    warnings.filterwarnings("ignore")
+
+    if args.recording_type == 'meg':
+    
+        # predict the previous n words, current word, and the next n words
+        delays =  list(np.arange(args.delay,-args.delay-1,-1)) 
+        #print(delays)
+
+        mat = sio.loadmat('./data/meg/{}_HP_notDetrended_25ms.mat'.format(args.subject))
+        data = mat['data']
+
+        n_words = data.shape[0]
+        n_time = data.shape[2]
+        n_sensor = data.shape[1]
+
+        data = data[SKIP_WORDS:END_WORDS]
+        data = delay_mat(data,delays)
+        print(data.shape)
+
+
+        corrs_t, preds_t, test_t = run_class_time_CV_crossval_ridge(data, 
+                                                                    predict_feat_dict,
+                                                                    regress_feat_names_list = regress_feat_names_list,
+                                                                    delays = delays,
+                                                                    encoding=args.encoding,
+                                                                    detrend = False,
+                                                                    do_correct = [],
+                                                                    splay = [],
+                                                                    do_acc = False,
+                                                                    SKIP_WORDS = SKIP_WORDS,
+                                                                    END_WORDS = END_WORDS)
+    elif args.recording_type == 'fmri':
+        
+        # load fMRI data
+        data = np.load('./data/fmri/data_subject_{}.npy'.format(args.subject))
+        corrs_t, _, _, preds_t, test_t = run_class_time_CV_fmri_crossval_ridge(data,
+                                                                    predict_feat_dict,
+                                                                    regress_feat_names_list=regress_feat_names_list)
+        
+    else:
+        print('Unrecognized recording_type {}. Options: meg, fmri'.format(args.recording_type))
+        
+   
+    if args.predict_feat_type == 'elmo' or args.predict_feat_type == 'bert':
+        if args.regress_feat_types != '0':
+            fname = 'predict_{}_with_{}_layer_{}_len_{}_regress_out_{}'.format(args.output_fname_prefix, args.predict_feat_type, args.layer, args.sequence_length, '+'.join(regress_feat_names_list))
+        else:
+            fname = 'predict_{}_with_{}_layer_{}_len_{}'.format(args.output_fname_prefix, args.predict_feat_type, args.layer, args.sequence_length)
+    else:
+        if args.regress_feat_types != '0':
+            fname = 'predict_{}_with_{}_regress_out_{}'.format(args.output_fname_prefix, args.predict_feat_type, '+'.join(regress_feat_names_list))
+        else:
+            fname = 'predict_{}_with_{}'.format(args.output_fname_prefix, args.predict_feat_type)
+
+    print('saving: {}'.format(args.output_dir + fname))
+    np.save(args.output_dir + fname + '.npy', {'corrs_t':corrs_t,'preds_t':preds_t,'test_t':test_t}) 
diff --git a/ridge_tools.py b/ridge_tools.py
new file mode 100644
index 0000000..3b7b843
--- /dev/null
+++ b/ridge_tools.py
@@ -0,0 +1,102 @@
+from numpy.linalg import inv, svd
+import numpy as np
+from sklearn.model_selection import KFold
+from sklearn.linear_model import Ridge, RidgeCV
+from sklearn.kernel_ridge import KernelRidge
+import time
+from scipy.stats import zscore
+
+def corr(X,Y):
+    return np.mean(zscore(X)*zscore(Y),0)
+
+def R2(Pred,Real):
+    SSres = np.mean((Real-Pred)**2,0)
+    SStot = np.var(Real,0)
+    return np.nan_to_num(1-SSres/SStot)
+
+def R2r(Pred,Real):
+    R2rs = R2(Pred,Real)
+    ind_neg = R2rs<0
+    R2rs = np.abs(R2rs)
+    R2rs = np.sqrt(R2rs)
+    R2rs[ind_neg] *= - 1
+    return R2rs
+
+def ridge(X,Y,lmbda):
+    return np.dot(inv(X.T.dot(X)+lmbda*np.eye(X.shape[1])),X.T.dot(Y))
+
+def ridge_by_lambda(X, Y, Xval, Yval, lambdas=np.array([0.1,1,10,100,1000])):
+    error = np.zeros((lambdas.shape[0],Y.shape[1]))
+    for idx,lmbda in enumerate(lambdas):
+        weights = ridge(X,Y,lmbda)
+        error[idx] = 1 - R2(np.dot(Xval,weights),Yval)
+    return error
+
+def ridge_sk(X,Y,lmbda):
+    rd = Ridge(alpha = lmbda)
+    rd.fit(X,Y)
+    return rd.coef_.T
+
+def ridgeCV_sk(X,Y,lmbdas):
+    rd = RidgeCV(alphas = lmbdas)
+    rd.fit(X,Y)
+    return rd.coef_.T
+
+def ridge_by_lambda_sk(X, Y, Xval, Yval, lambdas=np.array([0.1,1,10,100,1000])):
+    error = np.zeros((lambdas.shape[0],Y.shape[1]))
+    for idx,lmbda in enumerate(lambdas):
+        weights = ridge_sk(X,Y,lmbda)
+        error[idx] = 1 -  R2(np.dot(Xval,weights),Yval)
+    return error
+
+
+def cross_val_ridge(train_features,train_data, test_features, n_splits = 10,
+                    lambdas = np.array([10**i for i in range(-6,10)]),
+                    method = 'plain',
+                    do_plot = False):
+
+    ridge_1 = dict(plain = ridge_by_lambda,
+                   ridge_sk = ridge_by_lambda_sk)[method]
+    ridge_2 = dict(plain = ridge,
+                   ridge_sk = ridge_sk)[method]
+
+    n_voxels = train_data.shape[1]
+    nL = lambdas.shape[0]
+    r_cv = np.zeros((nL, train_data.shape[1]))
+
+    print(train_features.shape, train_data.shape)
+
+    kf = KFold(n_splits=n_splits)
+    start_t = time.time()
+    for icv, (trn, val) in enumerate(kf.split(train_data)):
+        #print('ntrain = {}'.format(train_features[trn].shape[0]))
+        cost = ridge_1(train_features[trn],train_data[trn],
+                               train_features[val],train_data[val],
+                               lambdas=lambdas)
+        if do_plot:
+            import matplotlib.pyplot as plt
+            plt.figure()
+            plt.imshow(cost,aspect = 'auto')
+        r_cv += cost
+    if do_plot:
+        plt.figure()
+        plt.imshow(r_cv,aspect='auto',cmap = 'RdBu_r');
+
+    argmin_lambda = np.argmin(r_cv,axis = 0)
+    preds = np.zeros((test_features.shape[0],train_data.shape[1]))
+    preds_train = np.zeros((train_features.shape[0],train_data.shape[1]))
+    weights_kernel_space = np.zeros((train_features.shape[0],train_data.shape[1]))
+    for idx_lambda in range(lambdas.shape[0]): # this is much faster than iterating over voxels!
+        idx_vox = argmin_lambda == idx_lambda
+        print(idx_lambda, argmin_lambda)
+        if sum(idx_vox) == 0:
+            continue
+
+        
+        print(train_features.shape, train_data[:,idx_vox].shape, test_features.shape)
+        preds[:,idx_vox], preds_train[:,idx_vox], weights_kernel_space[:,idx_vox] = ridge_2(train_features, train_data[:,idx_vox], test_features, lambdas[idx_lambda])
+    if do_plot:
+        plt.figure()
+        plt.imshow(weights,aspect='auto',cmap = 'RdBu_r',vmin = -0.5,vmax = 0.5);
+
+    return preds,preds_train, weights_kernel_space, np.array([lambdas[i] for i in argmin_lambda])
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..2696092
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,455 @@
+import numpy as np
+from sklearn.decomposition import PCA
+from scipy.stats import zscore
+import time
+import csv
+import os
+import nibabel
+from sklearn.metrics.pairwise import euclidean_distances
+from scipy.ndimage.filters import gaussian_filter
+import scipy.io as sio
+
+from ridge_tools import cross_val_ridge, corr
+import time as tm
+
+subject_runs = dict(F = [4,5,6,7],
+        G = [3,4,5,6],
+        H = [3,4,9,10],
+        I = [7,8,9,10],
+        J = [7,8,9,10],
+        K = [7,8,9,10],
+        L = [7,8,9,10],
+        M = [7,8,9,10],
+        N= [7,8,9,10])
+
+surfaces = dict( F = 'fMRI_story_F',
+        G = 'fMRI_story_G',
+        H = 'fMRI_story_H',
+        I = 'fMRI_story_I',
+        J = 'fMRI_story_J',
+        K = 'fMRI_story_K',
+        L = 'fMRI_story_L',
+        M = 'fMRI_story_M',
+        N = 'fMRI_story_N')
+
+transforms = dict( F = 'F_ars_auto2',
+        G = 'G_ars_auto2',
+        H = 'H_ars_auto2',
+        I = 'I_ars_auto2',
+        J = 'J_ars_auto2',
+        K = 'K_ars_auto2',
+        L = 'L_ars_auto2',
+        M = 'M_ars_auto2',
+        N = 'N_ars_auto2')
+    
+def load_transpose_zscore(file): 
+    dat = nibabel.load(file).get_data()
+    dat = dat.T
+    return zscore(dat,axis = 0)
+
+def smooth_run_not_masked(data,smooth_factor):
+    smoothed_data = np.zeros_like(data)
+    for i,d in enumerate(data):
+        smoothed_data[i] = gaussian_filter(data[i], sigma=smooth_factor, order=0, output=None,
+                 mode='reflect', cval=0.0, truncate=4.0)
+    return smoothed_data
+    
+def zscore_word_data(data):
+    # zscores time over each time window, and returns a 2D data structure.
+    # to zscore over all time windows, and not by time window, use function above
+    n_words = data.shape[0]
+    data = np.reshape(data,[n_words,-1])
+    data = np.nan_to_num(zscore(data))
+    return data
+
+def delay_one(mat, d):
+        # delays a matrix by a delay d. Positive d ==> row t has row t-d
+    new_mat = np.zeros_like(mat)
+    if d>0:
+        new_mat[d:] = mat[:-d]
+    elif d<0:
+        new_mat[:d] = mat[-d:]
+    else:
+        new_mat = mat
+    return new_mat
+
+def delay_mat(mat, delays):
+        # delays a matrix by a set of delays d.
+        # a row t in the returned matrix has the concatenated:
+        # row(t-delays[0],t-delays[1]...t-delays[last] )
+    new_mat = np.concatenate([delay_one(mat, d) for d in delays],axis = -1)
+    return new_mat
+
+# train/test is the full NLP feature
+# train/test_pca is the NLP feature reduced to 10 dimensions via PCA that has been fit on the training data
+# feat_dir is the directory where the NLP features are stored
+# train_indicator is an array of 0s and 1s indicating whether the word at this index is in the training set
+def get_nlp_features_fixed_length(layer, seq_len, feat_type, feat_dir, train_indicator, SKIP_WORDS=20, END_WORDS=5176):
+  
+    if layer == -1 and feat_type == 'bert':
+        all_layers_train = []
+        all_layers_test = []
+        for layer2 in range(13):
+            loaded = np.load(feat_dir + feat_type + '_length_'+str(seq_len)+ '_layer_' + str(layer2) + '.npy')
+            train = loaded[SKIP_WORDS:END_WORDS,:][train_indicator]
+            test = loaded[SKIP_WORDS:END_WORDS,:][~train_indicator]
+        
+            pca = PCA(n_components=10, svd_solver='full')
+            pca.fit(train)
+            train_pca = pca.transform(train)
+            test_pca = pca.transform(test)
+    
+            all_layers_train.append(train_pca)
+            all_layers_test.append(test_pca)
+
+        return train_pca,test_pca, np.hstack(all_layers_train), np.hstack(all_layers_test)
+
+  
+    loaded = np.load(feat_dir + feat_type + '_length_'+str(seq_len)+ '_layer_' + str(layer) + '.npy')
+    if feat_type == 'elmo':
+        train = loaded[SKIP_WORDS:END_WORDS,:][:,:512][train_indicator]   # only forward LSTM
+        test = loaded[SKIP_WORDS:END_WORDS,:][:,:512][~train_indicator]   # only forward LSTM
+    elif feat_type == 'gpt2':
+        train = loaded[SKIP_WORDS:END_WORDS,:][train_indicator]
+        test = loaded[SKIP_WORDS:END_WORDS,:][~train_indicator]
+    else:
+        print('Unrecognized NLP feature type {}. Available options elmo, gpt2'.format(feat_type))
+    
+    pca = PCA(n_components=10, svd_solver='full')
+    pca.fit(train)
+    train_pca = pca.transform(train)
+    test_pca = pca.transform(test)
+
+    return train, test, train_pca, test_pca 
+
+def load_features(feat_name_split, delay, train_indicator, feat_type='', feat_dir=''):
+        SKIP_WORDS = 20
+        END_WORDS = 5176
+        if 'emb' in feat_name_split:
+            train, test,_,_ = get_nlp_features_fixed_length(0, 1, feat_type, feat_dir, train_indicator)
+        elif 'context' in feat_name_split and feat_type == 'elmo':
+            train, test,_,_ = get_nlp_features_fixed_length(1, 25, feat_type, feat_dir, train_indicator)
+        elif 'context' in feat_name_split and feat_type == 'gpt2':
+            train, test,_,_ = get_nlp_features_fixed_length(11, 25, feat_type, feat_dir, train_indicator)
+        else:    
+            print('Unrecognized feat type {}'.format(feat_name_split))
+            return None
+
+        feature_train = np.roll(train,delay,axis=0)
+        feature_test = np.roll(test,delay,axis=0)
+        
+        return feature_train, feature_test
+
+
+# train_indicator is an array of 0s and 1s indicating whether the word at this index is in the training set
+def load_features_to_regress_out(feat_name_list, train_indicator, feat_type='', feat_dir='',SKIP_WORDS=20, END_WORDS=5176):
+        
+    if len(feat_name_list) == 0:
+        return [],[]
+        
+    regress_features_train = []
+    regress_features_test = []
+    
+    print(feat_name_list)
+    for feat_name in feat_name_list:
+        
+        feat_name_split = feat_name.split('-')
+        
+        if 'prev' in feat_name_split or 'back' in feat_name_split:
+            delay = int(feat_name_split[1])
+        elif 'next' in feat_name_split or 'fwd' in feat_name_split:
+            delay = -int(feat_name_split[1])
+        else:
+            delay = 0
+        
+        if delay == 0:
+            print('using delay of {}'.format(delay))
+            feature_train, feature_test = load_features(feat_name_split, delay, train_indicator, feat_type, feat_dir)
+
+            regress_features_train.append(feature_train)
+            regress_features_test.append(feature_test)
+        else:
+            while abs(delay) > 0:
+                print('using delay of {}'.format(delay))
+        
+                feature_train, feature_test = load_features(feat_name_split, delay, train_indicator, feat_type, feat_dir)
+
+                regress_features_train.append(feature_train)
+                regress_features_test.append(feature_test)  
+                
+                delay = delay - np.sign(delay)
+                
+                if 'back' in feat_name_split or 'fwd' in feat_name_split:   # only want the features at a particular delayed position, not all feats up to that point
+                        break
+                                        
+    return np.hstack(regress_features_train), np.hstack(regress_features_test)
+
+
+def CV_ind(n, n_folds):
+    ind = np.zeros((n))
+    n_items = int(np.floor(n/n_folds))
+    for i in range(0,n_folds -1):
+        ind[i*n_items:(i+1)*n_items] = i
+    ind[(n_folds-1)*n_items:] = (n_folds-1)
+    return ind
+
+def TR_to_word_CV_ind(TR_train_indicator,SKIP_WORDS=20,END_WORDS=5176):
+    time = np.load('./data/time_fmri.npy')
+    runs = np.load('./data/runs_fmri.npy') 
+    time_words = np.load('./data/time_words_fmri.npy')
+    time_words = time_words[SKIP_WORDS:END_WORDS]
+        
+    word_train_indicator = np.zeros([len(time_words)], dtype=bool)    
+    words_id = np.zeros([len(time_words)],dtype=int)
+    # w=find what TR each word belongs to
+    for i in range(len(time_words)):                
+        words_id[i] = np.where(time_words[i]> time)[0][-1]
+        
+        if words_id[i] <= len(runs) - 15:
+            offset = runs[int(words_id[i])]*20 + (runs[int(words_id[i])]-1)*15
+            if TR_train_indicator[int(words_id[i])-offset-1] == 1:
+                word_train_indicator[i] = True
+    return word_train_indicator        
+
+def run_class_time_CV_crossval_ridge(data, predict_feat_dict, 
+                   regress_feat_names_list=[], SKIP_WORDS = 5, END_WORDS = 5176,
+                   delays = [0], encoding=1, detrend = True, do_correct = [], n_folds = 4, splay = [],
+                   do_acc = True, frequency= 0, downsampled=1, seed=0):
+
+    # name = subject name
+    # features = NLP features for all words
+    # SKIP_WORDS = how many words to skip from the beginning in case the features are not good there
+    # END_WORDS = how many words to skip from the end in case the features are not good there
+    # method = ridge method: plain, svd, kernel_ridge, kernel_ridge_svd, ridge_sk
+    # lambdas = lambdas to try
+    # delays = look at current word + which other words? 0 = current, -1 previous, +1 next.
+    #          most common is [-2,-1,0,1,2]
+    # detrend = remove mean of last 5 words? MAYBE THIS SHOULD BE FALSE WHEN LOOKING AT CONTEXT
+    # do_correct = not used now
+    # n_folds = number of CV folds
+    # splay = only do the analysis on the words in this array
+    # do_acc = run single subject classification
+
+    # detrend
+    predict_feat_type = predict_feat_dict['feat_type']
+    nlp_feat_type = predict_feat_dict['nlp_feat_type']
+    feat_dir = predict_feat_dict['nlp_feat_dir']
+    layer = predict_feat_dict['layer']
+    seq_len = predict_feat_dict['seq_len']
+
+
+    n_words = data.shape[0]
+    if detrend:
+        running_mean = np.vstack([np.mean(np.mean(data[i-5:i,:,:],2),0) for i in range(5,n_words)])
+        data[5:] = np.stack([(data[5:,:,i]-running_mean).T for i in range(data.shape[2])]).T
+      
+    n_words = data.shape[0]
+    n_time = data.shape[2]
+    n_sensor = data.shape[1]
+
+    ind = CV_ind(n_words, n_folds=n_folds)
+
+    corrs = np.zeros((n_folds, n_time))
+    acc = np.zeros((n_folds, n_time))
+
+    preds_d = [] 
+    all_preds = []
+
+    all_test_data = []
+
+    for ind_num in range(n_folds):
+        start_time = time.time()
+
+        train_ind = ind!=ind_num
+        test_ind = ind==ind_num
+        
+        if predict_feat_type == 'elmo' or predict_feat_type == 'bert':
+                train_features,test_features,_,_ = get_nlp_features_fixed_length(layer, seq_len, nlp_feat_type, feat_dir, train_ind)
+        else:
+                train_features,test_features = load_features_to_regress_out([predict_feat_type], train_ind, nlp_feat_type, feat_dir)
+
+        # split data
+        train_data = data[train_ind]
+        test_data = data[test_ind]
+
+        # normalize data
+        train_data = zscore_word_data(train_data)
+        test_data = zscore_word_data(test_data)
+
+        all_test_data.append(test_data)
+
+        train_features = np.nan_to_num(zscore(train_features)) 
+        test_features = np.nan_to_num(zscore(test_features)) 
+        
+        # if regressing out features, do it now
+        if len(regress_feat_names_list) > 0:
+
+            print('working on regressing out {}'.format(regress_feat_names_list))
+
+            regress_train_features, regress_test_features = load_features_to_regress_out(regress_feat_names_list, train_ind, nlp_feat_type, feat_dir)
+            
+            preds_test,preds_train,_,_ = cross_val_ridge(regress_train_features,train_features,regress_test_features, n_splits = 10, lambdas = np.array([10**i for i in range(-6,10)]), method = 'kernel_ridge',do_plot = False)  
+            
+            train_features = np.reshape(train_features-preds_train, train_features.shape)
+            test_features = np.reshape(test_features-preds_test, test_features.shape)
+            print('done regressing out')
+
+        if encoding == 0: # decoding experiment
+            preds,_,_,_ = cross_val_ridge(train_data,train_features,test_data,n_splits = 10,lambdas = np.array([10**i for i in range(-6,10)]), method = 'kernel_ridge',do_plot = False)
+        else:
+            preds,_,_,_ = cross_val_ridge(train_features,train_data,test_features,n_splits = 10,lambdas = np.array([10**i for i in range(-6,10)]), method = 'kernel_ridge',do_plot = False)
+        
+        if encoding == 0:
+            corrs[ind_num,:] = corr(preds,test_features).mean(0)
+        if encoding == 1:
+            corrs[ind_num,:] = corr(preds,test_data).reshape(n_sensor,n_time).mean(0)
+        
+        all_preds.append(preds)
+        n_pred = preds.shape[0]
+        #del  weights
+
+        print('CV fold ' + str(ind_num) + ' ' + str(time.time()-start_time))
+
+
+    return corrs, np.vstack(all_preds), np.vstack(all_test_data)
+
+
+def prepare_fmri_features(train_features, test_features, word_train_indicator, TR_train_indicator, SKIP_WORDS=20, END_WORDS=5176):
+        
+    time = np.load('./data/time_fmri.npy')
+    runs = np.load('./data/runs_fmri.npy') 
+    time_words = np.load('./data/time_words_fmri.npy')
+    time_words = time_words[SKIP_WORDS:END_WORDS]
+        
+    words_id = np.zeros([len(time_words)])
+    # w=find what TR each word belongs to
+    for i in range(len(time_words)):
+        words_id[i] = np.where(time_words[i]> time)[0][-1]
+        
+    all_features = np.zeros([time_words.shape[0], train_features.shape[1]])
+    all_features[word_train_indicator] = train_features
+    all_features[~word_train_indicator] = test_features
+        
+    p = all_features.shape[1]
+    tmp = np.zeros([time.shape[0], p])
+    for i in range(time.shape[0]):
+        tmp[i] = np.mean(all_features[(words_id<=i)*(words_id>i-1)],0)
+    tmp = delay_mat(tmp, np.arange(1,5))
+
+    # remove the edges of each run
+    tmp = np.vstack([zscore(tmp[runs==i][20:-15]) for i in range(1,5)])
+    tmp = np.nan_to_num(tmp)
+        
+    return tmp[TR_train_indicator], tmp[~TR_train_indicator]
+
+  
+
+def run_class_time_CV_fmri_crossval_ridge(data, predict_feat_dict,
+                                          regress_feat_names_list = [],method = 'kernel_ridge', 
+                                          lambdas = np.array([0.1,1,10,100,1000]),
+                                          detrend = False, n_folds = 4, skip=5):
+    
+    predict_feat_type = predict_feat_dict['feat_type']
+    nlp_feat_type = predict_feat_dict['nlp_feat_type']
+    feat_dir = predict_feat_dict['nlp_feat_dir']
+    layer = predict_feat_dict['layer']
+    seq_len = predict_feat_dict['seq_len']
+        
+        
+    n_words = data.shape[0]
+    n_voxels = data.shape[1]
+        
+    print(n_words)
+
+    ind = CV_ind(n_words, n_folds=n_folds)
+
+    corrs = np.zeros((n_folds, n_voxels))
+    acc = np.zeros((n_folds, n_voxels))
+    acc_std = np.zeros((n_folds, n_voxels))
+    preds_d = np.zeros((data.shape[0], data.shape[1]))
+
+    all_test_data = []
+    all_preds = []
+    
+    for ind_num in range(n_folds):
+        train_ind = ind!=ind_num
+        test_ind = ind==ind_num
+        
+        word_CV_ind = TR_to_word_CV_ind(train_ind)
+        if nlp_feat_type == 'brain': 
+                word_CV_ind = train_ind
+        
+        if predict_feat_type == 'elmo' or predict_feat_type == 'bert':
+                tmp_train_features,tmp_test_features,_,_ = get_nlp_features_fixed_length(layer, seq_len, nlp_feat_type, feat_dir, word_CV_ind)
+        else:
+                tmp_train_features,tmp_test_features = load_features_to_regress_out(predict_feat_type.split('+'), word_CV_ind, nlp_feat_type, feat_dir)
+        
+        if nlp_feat_type != 'brain': 
+            train_features,test_features = prepare_fmri_features(tmp_train_features, tmp_test_features, word_CV_ind, train_ind)
+        else: # no need to concatenate multiple TRs for brain to brain predictions
+            train_features = tmp_train_features
+            test_features = tmp_test_features
+        
+        if len(regress_feat_names_list) > 0:
+            tmp_regress_train_features, tmp_regress_test_features = load_features_to_regress_out(regress_feat_names_list, word_CV_ind, nlp_feat_type, feat_dir) 
+            
+            if nlp_feat_type != 'brain':     
+                regress_train_features,regress_test_features = prepare_fmri_features(tmp_regress_train_features, tmp_regress_test_features, word_CV_ind, train_ind)
+            else:
+                regress_train_features = tmp_regress_train_features
+                regress_test_features = tmp_regress_test_features
+                
+        # split data
+        train_data = data[train_ind]
+        test_data = data[test_ind]
+
+        # skip TRs between train and test data
+        if ind_num == 0: # just remove from front end
+            train_data = train_data[skip:,:]
+            train_features = train_features[skip:,:]
+        elif ind_num == n_folds-1: # just remove from back end
+            train_data = train_data[:-skip,:]
+            train_features = train_features[:-skip,:]
+        else:
+            test_data = test_data[skip:-skip,:]
+            test_features = test_features[skip:-skip,:]
+
+        # normalize data
+        train_data = np.nan_to_num(zscore(np.nan_to_num(train_data)))
+        test_data = np.nan_to_num(zscore(np.nan_to_num(test_data)))
+        all_test_data.append(test_data)
+        
+        train_features = np.nan_to_num(zscore(train_features))
+        test_features = np.nan_to_num(zscore(test_features))
+
+        print('features size: {}, data size: {}'.format(train_features.shape, train_data.shape))
+        
+        # if regressing out features, do it now
+        if len(regress_feat_names_list) > 0:
+            
+            # skip TRs between train and test data
+            if ind_num == 0: # just remove from front end
+                regress_train_features = regress_train_features[skip:,:]
+            elif ind_num == n_folds-1: # just remove from back end
+                regress_train_features = regress_train_features[:-skip,:]
+            else:
+                regress_test_features = regress_test_features[skip:-skip,:]
+
+            print('after regressing, features size: {}, data size: {}'.format(regress_train_features.shape, train_features.shape))
+            
+            preds_test,preds_train,_,_ = cross_val_ridge(regress_train_features,train_features,regress_test_features, n_splits = 10, lambdas = np.array([10**i for i in range(-6,10)]), method = 'kernel_ridge',do_plot = False)
+                        
+            train_features = np.reshape(train_features-preds_train, train_features.shape)
+            test_features = np.reshape(test_features-preds_test, test_features.shape)
+            print('done regressing out')
+        
+        start_time = tm.time()
+
+        preds,_,_,chosen_lambdas = cross_val_ridge(train_features,train_data,test_features, n_splits = 10, lambdas = np.array([10**i for i in range(-6,10)]), method = 'kernel_ridge',do_plot = False)
+        corrs[ind_num,:] = corr(preds,test_data)
+        all_preds.append(preds)     
+
+        print('fold {} completed, took {} seconds'.format(ind_num, tm.time()-start_time))
+
+
+    return corrs, acc, acc_std, np.vstack(all_preds), np.vstack(all_test_data)