Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
lwehbe authored Oct 10, 2022
1 parent c3f94e1 commit e25244b
Show file tree
Hide file tree
Showing 3 changed files with 660 additions and 0 deletions.
103 changes: 103 additions & 0 deletions make_predictions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import argparse
import numpy as np
import warnings

from utils import delay_mat, delay_one, zscore_word_data, run_class_time_CV_crossval_ridge, run_class_time_CV_fmri_crossval_ridge
from utils import subject_runs, surfaces, transforms, load_transpose_zscore, smooth_run_not_masked, CV_ind


from sklearn.decomposition import PCA
from scipy.stats import zscore
import scipy.io as sio

import time as tm
from scipy import signal
import pickle as pk

SKIP_WORDS = 20
END_WORDS = 5176

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--subject", required=True)
parser.add_argument("--predict_feat_type", required=True)
parser.add_argument("--nlp_feat_type", required=True)
parser.add_argument("--nlp_feat_dir", required=True)
parser.add_argument("--layer", type=int, required=False)
parser.add_argument("--sequence_length", type=int, required=False)
parser.add_argument("--output_fname_prefix", required=True)
parser.add_argument("--output_dir", required=True)
parser.add_argument("--delay", type=int, default=0)
parser.add_argument("--recording_type", required=True)
parser.add_argument("--regress_feat_types", default=0)
parser.add_argument("--encoding", type=int, default=1)

args = parser.parse_args()
print(args)

if args.regress_feat_types != '0':
regress_feat_names_list = np.sort(args.regress_feat_types.split('+'))
else:
regress_feat_names_list = []

predict_feat_dict = {'feat_type':args.predict_feat_type,
'nlp_feat_type':args.nlp_feat_type,
'nlp_feat_dir':args.nlp_feat_dir,
'layer':args.layer,
'seq_len':args.sequence_length}
warnings.filterwarnings("ignore")

if args.recording_type == 'meg':

# predict the previous n words, current word, and the next n words
delays = list(np.arange(args.delay,-args.delay-1,-1))
#print(delays)

mat = sio.loadmat('./data/meg/{}_HP_notDetrended_25ms.mat'.format(args.subject))
data = mat['data']

n_words = data.shape[0]
n_time = data.shape[2]
n_sensor = data.shape[1]

data = data[SKIP_WORDS:END_WORDS]
data = delay_mat(data,delays)
print(data.shape)


corrs_t, preds_t, test_t = run_class_time_CV_crossval_ridge(data,
predict_feat_dict,
regress_feat_names_list = regress_feat_names_list,
delays = delays,
encoding=args.encoding,
detrend = False,
do_correct = [],
splay = [],
do_acc = False,
SKIP_WORDS = SKIP_WORDS,
END_WORDS = END_WORDS)
elif args.recording_type == 'fmri':

# load fMRI data
data = np.load('./data/fmri/data_subject_{}.npy'.format(args.subject))
corrs_t, _, _, preds_t, test_t = run_class_time_CV_fmri_crossval_ridge(data,
predict_feat_dict,
regress_feat_names_list=regress_feat_names_list)

else:
print('Unrecognized recording_type {}. Options: meg, fmri'.format(args.recording_type))


if args.predict_feat_type == 'elmo' or args.predict_feat_type == 'bert':
if args.regress_feat_types != '0':
fname = 'predict_{}_with_{}_layer_{}_len_{}_regress_out_{}'.format(args.output_fname_prefix, args.predict_feat_type, args.layer, args.sequence_length, '+'.join(regress_feat_names_list))
else:
fname = 'predict_{}_with_{}_layer_{}_len_{}'.format(args.output_fname_prefix, args.predict_feat_type, args.layer, args.sequence_length)
else:
if args.regress_feat_types != '0':
fname = 'predict_{}_with_{}_regress_out_{}'.format(args.output_fname_prefix, args.predict_feat_type, '+'.join(regress_feat_names_list))
else:
fname = 'predict_{}_with_{}'.format(args.output_fname_prefix, args.predict_feat_type)

print('saving: {}'.format(args.output_dir + fname))
np.save(args.output_dir + fname + '.npy', {'corrs_t':corrs_t,'preds_t':preds_t,'test_t':test_t})
102 changes: 102 additions & 0 deletions ridge_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from numpy.linalg import inv, svd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.kernel_ridge import KernelRidge
import time
from scipy.stats import zscore

def corr(X,Y):
return np.mean(zscore(X)*zscore(Y),0)

def R2(Pred,Real):
SSres = np.mean((Real-Pred)**2,0)
SStot = np.var(Real,0)
return np.nan_to_num(1-SSres/SStot)

def R2r(Pred,Real):
R2rs = R2(Pred,Real)
ind_neg = R2rs<0
R2rs = np.abs(R2rs)
R2rs = np.sqrt(R2rs)
R2rs[ind_neg] *= - 1
return R2rs

def ridge(X,Y,lmbda):
return np.dot(inv(X.T.dot(X)+lmbda*np.eye(X.shape[1])),X.T.dot(Y))

def ridge_by_lambda(X, Y, Xval, Yval, lambdas=np.array([0.1,1,10,100,1000])):
error = np.zeros((lambdas.shape[0],Y.shape[1]))
for idx,lmbda in enumerate(lambdas):
weights = ridge(X,Y,lmbda)
error[idx] = 1 - R2(np.dot(Xval,weights),Yval)
return error

def ridge_sk(X,Y,lmbda):
rd = Ridge(alpha = lmbda)
rd.fit(X,Y)
return rd.coef_.T

def ridgeCV_sk(X,Y,lmbdas):
rd = RidgeCV(alphas = lmbdas)
rd.fit(X,Y)
return rd.coef_.T

def ridge_by_lambda_sk(X, Y, Xval, Yval, lambdas=np.array([0.1,1,10,100,1000])):
error = np.zeros((lambdas.shape[0],Y.shape[1]))
for idx,lmbda in enumerate(lambdas):
weights = ridge_sk(X,Y,lmbda)
error[idx] = 1 - R2(np.dot(Xval,weights),Yval)
return error


def cross_val_ridge(train_features,train_data, test_features, n_splits = 10,
lambdas = np.array([10**i for i in range(-6,10)]),
method = 'plain',
do_plot = False):

ridge_1 = dict(plain = ridge_by_lambda,
ridge_sk = ridge_by_lambda_sk)[method]
ridge_2 = dict(plain = ridge,
ridge_sk = ridge_sk)[method]

n_voxels = train_data.shape[1]
nL = lambdas.shape[0]
r_cv = np.zeros((nL, train_data.shape[1]))

print(train_features.shape, train_data.shape)

kf = KFold(n_splits=n_splits)
start_t = time.time()
for icv, (trn, val) in enumerate(kf.split(train_data)):
#print('ntrain = {}'.format(train_features[trn].shape[0]))
cost = ridge_1(train_features[trn],train_data[trn],
train_features[val],train_data[val],
lambdas=lambdas)
if do_plot:
import matplotlib.pyplot as plt
plt.figure()
plt.imshow(cost,aspect = 'auto')
r_cv += cost
if do_plot:
plt.figure()
plt.imshow(r_cv,aspect='auto',cmap = 'RdBu_r');

argmin_lambda = np.argmin(r_cv,axis = 0)
preds = np.zeros((test_features.shape[0],train_data.shape[1]))
preds_train = np.zeros((train_features.shape[0],train_data.shape[1]))
weights_kernel_space = np.zeros((train_features.shape[0],train_data.shape[1]))
for idx_lambda in range(lambdas.shape[0]): # this is much faster than iterating over voxels!
idx_vox = argmin_lambda == idx_lambda
print(idx_lambda, argmin_lambda)
if sum(idx_vox) == 0:
continue


print(train_features.shape, train_data[:,idx_vox].shape, test_features.shape)
preds[:,idx_vox], preds_train[:,idx_vox], weights_kernel_space[:,idx_vox] = ridge_2(train_features, train_data[:,idx_vox], test_features, lambdas[idx_lambda])
if do_plot:
plt.figure()
plt.imshow(weights,aspect='auto',cmap = 'RdBu_r',vmin = -0.5,vmax = 0.5);

return preds,preds_train, weights_kernel_space, np.array([lambdas[i] for i in argmin_lambda])
Loading

0 comments on commit e25244b

Please sign in to comment.