-
Notifications
You must be signed in to change notification settings - Fork 0
/
wrapper_regression.py
133 lines (110 loc) · 5.85 KB
/
wrapper_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import matplotlib as mpl
mpl.use('Agg')
import tensorflow as tf
import numpy as np
from collections import defaultdict
import random
import argparse
import matplotlib.pyplot as plt
import sys
import os
DTYPE=tf.float64
import seaborn as sns
from sklearn.cluster import KMeans
from matplotlib import rcParams
import itertools
from scipy.stats import norm
import pandas as pd
import scipy
from GP import main_DeepGP
sys.setrecursionlimit(10000)
from uci_datasets_fetcher import *
def RBF_np(X1, X2, log_lengthscales, log_kernel_variance):
X1 = X1 / np.exp(log_lengthscales)
X2 = X2 / np.exp(log_lengthscales)
X1s = np.sum(np.square(X1),1)
X2s = np.sum(np.square(X2),1)
return np.exp(log_kernel_variance) * np.exp(-(-2.0 * np.matmul(X1,np.transpose(X2)) + np.reshape(X1s,(-1,1)) + np.reshape(X2s,(1,-1))) /2)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--num_layers', type=int, help = 'the number of total GP layers')
parser.add_argument('--num_inducing', type=int, help = 'the number of inducing points in the first layer')
parser.add_argument('--dim_layers', type=int, help='the size of the hidden GP layers, this is a scalar which generalizes across all hidden layers of the DGP')
parser.add_argument('--learning_rate', type = float, help = 'the constant learning rate to be used for optimization')
parser.add_argument('--num_iterations', type = int, help = 'the number of iterations for the training process')
parser.add_argument('--num_batch', type = int , help = 'minibatch size during training process')
parser.add_argument('--base_seed', type = int, help = 'seed for selecting the minibatch')
parser.add_argument('--dataset_name', type = str, help = 'name of the UCI dataset to use')
parser.add_argument('--dim_output', type=int)
args = parser.parse_args()
#####################################
######### Fetch UCI dataset #########
#####################################
X_training, Y_training, X_testing, Y_testing = fetch_dataset(name=args.dataset_name)
### TODO -- remove this once you figure out a computationally efficient version of this model ###
X_training = X_training[:1000,...]
Y_training = Y_training[:1000,...]
print('shape of training set')
print(X_training.shape)
print(Y_training.shape)
print('shape of testing set')
print(X_testing.shape)
print(Y_testing.shape)
num_inducing = [args.num_inducing]
num_inducing.extend([args.num_inducing for _ in range(args.num_layers-1)])
dim_layers = [X_training.shape[1]]
dim_layers.extend([args.dim_layers for _ in range(args.num_layers-1)])
dim_layers.append(args.dim_output)
####################################################################################
######## get initalization for first layer inducing points locations ###############
####################################################################################
decimal = int(X_training.shape[0]/10)
if decimal>args.num_inducing:
k_mean_output = get_kmeans(X = X_training[:decimal,...], num_inducing = args.num_inducing)
elif X_training.shape[0] >10000:
k_mean_output = get_kmeans(X = X_training[:10000,...], num_inducing = args.num_inducing)
else:
k_mean_output = get_kmeans(X = X_training, num_inducing = args.num_inducing)
X_training = X_training.astype('float64')
X_testing = X_testing.astype('float64')
Y_training = Y_training.astype('float64')
Y_testing = Y_testing.astype('float64')
k_mean_output = k_mean_output.astype('float64')
age_mean = np.mean(Y_training)
Y_training = Y_training-age_mean
### create the dictionary containing
dict_chol_Kuu = defaultdict()
dict_chol_Kuu_inv = defaultdict()
Kuu = RBF_np(X1 = k_mean_output, X2 = k_mean_output, log_lengthscales = [-0.301 for nvm in range(X_training.shape[1])], log_kernel_variance = 0.301)
Kuu += np.eye(args.num_inducing) * 1e-1
Kuu_inv = np.linalg.inv(Kuu)
chol_Kuu_inv = np.linalg.cholesky(Kuu_inv)
chol_Kuu = np.linalg.cholesky(Kuu)
dict_chol_Kuu[1] = chol_Kuu
dict_chol_Kuu_inv[1] = chol_Kuu_inv
print('*** Cholesky *****')
print(chol_Kuu)
dict_chol_Schur = defaultdict()
Kff = RBF_np(X1 = X_training, X2 = X_training, log_lengthscales = [-0.301 for nvm in range(X_training.shape[1])], log_kernel_variance = 0.301)
Kff += np.eye(X_training.shape[0]) * 1e-1
Kfu = RBF_np(X1 = X_training, X2 = k_mean_output, log_lengthscales = [-0.301 for nvm in range(X_training.shape[1])], log_kernel_variance = 0.301)
Schur = Kff - np.matmul(np.matmul(Kfu,Kuu_inv),np.transpose(Kfu))
chol_Schur = np.linalg.cholesky(Schur)
dict_chol_Schur[1] = chol_Schur
print('*** Cholesky *****')
print(chol_Schur)
main_DeepGP(num_data = X_training.shape[0], dim_input = X_training.shape[1], dim_output = args.dim_output,
num_iterations = args.num_iterations, num_inducing = num_inducing,
type_var = 'full', num_layers = args.num_layers, dim_layers = dim_layers,
num_batch = args.num_batch, Z_init = k_mean_output,
num_test = X_testing.shape[0] , learning_rate = args.learning_rate, base_seed = args.base_seed, mean_Y_training = age_mean, dataset_name = args.dataset_name,
use_diagnostics = True, task_type = 'regression', X_training = X_training,
Y_training = Y_training,
X_testing = X_testing,
Y_testing = Y_testing, posterior_cholesky_Kmm = dict_chol_Kuu, posterior_cholesky_Kmm_inv = dict_chol_Kuu_inv,
posterior_cholesky_Schur = dict_chol_Schur,
num_samples_testing = 1,
num_samples_hut_trace_est=500,
student_or_gaussian = 'student')