-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
660 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import argparse | ||
import numpy as np | ||
import warnings | ||
|
||
from utils import delay_mat, delay_one, zscore_word_data, run_class_time_CV_crossval_ridge, run_class_time_CV_fmri_crossval_ridge | ||
from utils import subject_runs, surfaces, transforms, load_transpose_zscore, smooth_run_not_masked, CV_ind | ||
|
||
|
||
from sklearn.decomposition import PCA | ||
from scipy.stats import zscore | ||
import scipy.io as sio | ||
|
||
import time as tm | ||
from scipy import signal | ||
import pickle as pk | ||
|
||
SKIP_WORDS = 20 | ||
END_WORDS = 5176 | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--subject", required=True) | ||
parser.add_argument("--predict_feat_type", required=True) | ||
parser.add_argument("--nlp_feat_type", required=True) | ||
parser.add_argument("--nlp_feat_dir", required=True) | ||
parser.add_argument("--layer", type=int, required=False) | ||
parser.add_argument("--sequence_length", type=int, required=False) | ||
parser.add_argument("--output_fname_prefix", required=True) | ||
parser.add_argument("--output_dir", required=True) | ||
parser.add_argument("--delay", type=int, default=0) | ||
parser.add_argument("--recording_type", required=True) | ||
parser.add_argument("--regress_feat_types", default=0) | ||
parser.add_argument("--encoding", type=int, default=1) | ||
|
||
args = parser.parse_args() | ||
print(args) | ||
|
||
if args.regress_feat_types != '0': | ||
regress_feat_names_list = np.sort(args.regress_feat_types.split('+')) | ||
else: | ||
regress_feat_names_list = [] | ||
|
||
predict_feat_dict = {'feat_type':args.predict_feat_type, | ||
'nlp_feat_type':args.nlp_feat_type, | ||
'nlp_feat_dir':args.nlp_feat_dir, | ||
'layer':args.layer, | ||
'seq_len':args.sequence_length} | ||
warnings.filterwarnings("ignore") | ||
|
||
if args.recording_type == 'meg': | ||
|
||
# predict the previous n words, current word, and the next n words | ||
delays = list(np.arange(args.delay,-args.delay-1,-1)) | ||
#print(delays) | ||
|
||
mat = sio.loadmat('./data/meg/{}_HP_notDetrended_25ms.mat'.format(args.subject)) | ||
data = mat['data'] | ||
|
||
n_words = data.shape[0] | ||
n_time = data.shape[2] | ||
n_sensor = data.shape[1] | ||
|
||
data = data[SKIP_WORDS:END_WORDS] | ||
data = delay_mat(data,delays) | ||
print(data.shape) | ||
|
||
|
||
corrs_t, preds_t, test_t = run_class_time_CV_crossval_ridge(data, | ||
predict_feat_dict, | ||
regress_feat_names_list = regress_feat_names_list, | ||
delays = delays, | ||
encoding=args.encoding, | ||
detrend = False, | ||
do_correct = [], | ||
splay = [], | ||
do_acc = False, | ||
SKIP_WORDS = SKIP_WORDS, | ||
END_WORDS = END_WORDS) | ||
elif args.recording_type == 'fmri': | ||
|
||
# load fMRI data | ||
data = np.load('./data/fmri/data_subject_{}.npy'.format(args.subject)) | ||
corrs_t, _, _, preds_t, test_t = run_class_time_CV_fmri_crossval_ridge(data, | ||
predict_feat_dict, | ||
regress_feat_names_list=regress_feat_names_list) | ||
|
||
else: | ||
print('Unrecognized recording_type {}. Options: meg, fmri'.format(args.recording_type)) | ||
|
||
|
||
if args.predict_feat_type == 'elmo' or args.predict_feat_type == 'bert': | ||
if args.regress_feat_types != '0': | ||
fname = 'predict_{}_with_{}_layer_{}_len_{}_regress_out_{}'.format(args.output_fname_prefix, args.predict_feat_type, args.layer, args.sequence_length, '+'.join(regress_feat_names_list)) | ||
else: | ||
fname = 'predict_{}_with_{}_layer_{}_len_{}'.format(args.output_fname_prefix, args.predict_feat_type, args.layer, args.sequence_length) | ||
else: | ||
if args.regress_feat_types != '0': | ||
fname = 'predict_{}_with_{}_regress_out_{}'.format(args.output_fname_prefix, args.predict_feat_type, '+'.join(regress_feat_names_list)) | ||
else: | ||
fname = 'predict_{}_with_{}'.format(args.output_fname_prefix, args.predict_feat_type) | ||
|
||
print('saving: {}'.format(args.output_dir + fname)) | ||
np.save(args.output_dir + fname + '.npy', {'corrs_t':corrs_t,'preds_t':preds_t,'test_t':test_t}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from numpy.linalg import inv, svd | ||
import numpy as np | ||
from sklearn.model_selection import KFold | ||
from sklearn.linear_model import Ridge, RidgeCV | ||
from sklearn.kernel_ridge import KernelRidge | ||
import time | ||
from scipy.stats import zscore | ||
|
||
def corr(X,Y): | ||
return np.mean(zscore(X)*zscore(Y),0) | ||
|
||
def R2(Pred,Real): | ||
SSres = np.mean((Real-Pred)**2,0) | ||
SStot = np.var(Real,0) | ||
return np.nan_to_num(1-SSres/SStot) | ||
|
||
def R2r(Pred,Real): | ||
R2rs = R2(Pred,Real) | ||
ind_neg = R2rs<0 | ||
R2rs = np.abs(R2rs) | ||
R2rs = np.sqrt(R2rs) | ||
R2rs[ind_neg] *= - 1 | ||
return R2rs | ||
|
||
def ridge(X,Y,lmbda): | ||
return np.dot(inv(X.T.dot(X)+lmbda*np.eye(X.shape[1])),X.T.dot(Y)) | ||
|
||
def ridge_by_lambda(X, Y, Xval, Yval, lambdas=np.array([0.1,1,10,100,1000])): | ||
error = np.zeros((lambdas.shape[0],Y.shape[1])) | ||
for idx,lmbda in enumerate(lambdas): | ||
weights = ridge(X,Y,lmbda) | ||
error[idx] = 1 - R2(np.dot(Xval,weights),Yval) | ||
return error | ||
|
||
def ridge_sk(X,Y,lmbda): | ||
rd = Ridge(alpha = lmbda) | ||
rd.fit(X,Y) | ||
return rd.coef_.T | ||
|
||
def ridgeCV_sk(X,Y,lmbdas): | ||
rd = RidgeCV(alphas = lmbdas) | ||
rd.fit(X,Y) | ||
return rd.coef_.T | ||
|
||
def ridge_by_lambda_sk(X, Y, Xval, Yval, lambdas=np.array([0.1,1,10,100,1000])): | ||
error = np.zeros((lambdas.shape[0],Y.shape[1])) | ||
for idx,lmbda in enumerate(lambdas): | ||
weights = ridge_sk(X,Y,lmbda) | ||
error[idx] = 1 - R2(np.dot(Xval,weights),Yval) | ||
return error | ||
|
||
|
||
def cross_val_ridge(train_features,train_data, test_features, n_splits = 10, | ||
lambdas = np.array([10**i for i in range(-6,10)]), | ||
method = 'plain', | ||
do_plot = False): | ||
|
||
ridge_1 = dict(plain = ridge_by_lambda, | ||
ridge_sk = ridge_by_lambda_sk)[method] | ||
ridge_2 = dict(plain = ridge, | ||
ridge_sk = ridge_sk)[method] | ||
|
||
n_voxels = train_data.shape[1] | ||
nL = lambdas.shape[0] | ||
r_cv = np.zeros((nL, train_data.shape[1])) | ||
|
||
print(train_features.shape, train_data.shape) | ||
|
||
kf = KFold(n_splits=n_splits) | ||
start_t = time.time() | ||
for icv, (trn, val) in enumerate(kf.split(train_data)): | ||
#print('ntrain = {}'.format(train_features[trn].shape[0])) | ||
cost = ridge_1(train_features[trn],train_data[trn], | ||
train_features[val],train_data[val], | ||
lambdas=lambdas) | ||
if do_plot: | ||
import matplotlib.pyplot as plt | ||
plt.figure() | ||
plt.imshow(cost,aspect = 'auto') | ||
r_cv += cost | ||
if do_plot: | ||
plt.figure() | ||
plt.imshow(r_cv,aspect='auto',cmap = 'RdBu_r'); | ||
|
||
argmin_lambda = np.argmin(r_cv,axis = 0) | ||
preds = np.zeros((test_features.shape[0],train_data.shape[1])) | ||
preds_train = np.zeros((train_features.shape[0],train_data.shape[1])) | ||
weights_kernel_space = np.zeros((train_features.shape[0],train_data.shape[1])) | ||
for idx_lambda in range(lambdas.shape[0]): # this is much faster than iterating over voxels! | ||
idx_vox = argmin_lambda == idx_lambda | ||
print(idx_lambda, argmin_lambda) | ||
if sum(idx_vox) == 0: | ||
continue | ||
|
||
|
||
print(train_features.shape, train_data[:,idx_vox].shape, test_features.shape) | ||
preds[:,idx_vox], preds_train[:,idx_vox], weights_kernel_space[:,idx_vox] = ridge_2(train_features, train_data[:,idx_vox], test_features, lambdas[idx_lambda]) | ||
if do_plot: | ||
plt.figure() | ||
plt.imshow(weights,aspect='auto',cmap = 'RdBu_r',vmin = -0.5,vmax = 0.5); | ||
|
||
return preds,preds_train, weights_kernel_space, np.array([lambdas[i] for i in argmin_lambda]) |
Oops, something went wrong.