merge with dev brunch

ocelot-collab · Mar 20, 2019 · 41cde26 · 41cde26
2 parents 2b6f81a + fe3b6ca
commit 41cde26
Show file tree

Hide file tree

Showing 100 changed files with 16,957 additions and 1,768 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+log.txt
 .idea/
 
 # PyDev

diff --git a/GP/BasicInterfaces.py b/GP/BasicInterfaces.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+"""
+Contains simple interfaces for the Bayes optimization class.
+
+Each interface must have the getState and setX methods as used below.
+"""
+
+import numpy as np
+
+# Basically a dummy interface that just holds x-values
+class TestInterface(object):
+ def __init__(self, x_init, y_init=0):
+ self.x = np.array(x_init,ndmin=2)
+ self.y = y_init
+
+ def getState(self):
+ return self.x, self.y
+
+ def setX(self, x_new):
+ self.x = x_new
+
+# an interface that evaluates a function to give y-values
+class fint(object):
+ def __init__(self, x_init, noise=0):
+ self.x = np.array(x_init,ndmin=2)
+ self.y = -1
+ self.noise = noise
+
+ def getState(self):
+ return self.x, self.f(self.x)
+
+ def f(self, x):
+ res = 20 - np.reshape(np.sum((x - 1)**2,axis=1) * np.sum((x + 1)**2,axis=1),x.shape) + x
+ res[np.abs(x) > 3.0] = 0
+ return res
+
+ def setX(self, x_new):
+ self.x = x_new
+
+# uses a GP model's predictions to give y-values
+class GPint(object):
+ def __init__(self, x_init, model):
+ self.model = model
+ self.x = x_init
+ self.y = model.predict(np.array(x_init,ndmin=2))
+
+ def getState(self):
+ return self.x, self.model.predict(np.array(self.x,ndmin=2))[0]
+
+ def setX(self, x_new):
+ self.x = x_new
diff --git a/GP/DKL/README.md b/GP/DKL/README.md
@@ -0,0 +1,73 @@
+# Deep-Kernel-GP
+
+## Dependencies
+The package has numpy and scipy.linalg as dependencies.
+The examples also use matplotlib and scikit-learn
+
+## Introduction
+
+
+
+Instead of learning a mapping X-->Y with a neural network or GP regression, we learn the following mappings:
+X-->Z-->Y where the first step is performed by a neural net and the second by a gp regression algorithm.
+
+This way we are able to use GP Regression to learn functions on data where the the assumption that y(x) is a gaussian surface with covariance specified by one of the standard covariance fucntions, might not be a fair assumption.
+For instance we can learn functions with image pixels as inputs or functions with length scales that varies with the input.
+
+
+The parameters of the neural net are trained maximizing the log marginal likelihood implied by z(x_train) and y_train.
+
+[Deep Kernel Learning - A.G. Wilson ++ ](https://arxiv.org/pdf/1511.02222.pdf)
+
+[Using Deep Belief Nets to Learn Covariance Kernels
+for Gaussian Processes - G. Hinton ++](http://www.cs.toronto.edu/~fritz/absps/dbngp.pdf)
+
+## Examples
+Basic usage is done with a Scikit ish API:
+
+```python
+
+layers=[]
+layers.append(Dense(32,activation='tanh'))
+layers.append(Dense(1))
+layers.append(CovMat(kernel='rbf'))
+
+opt=Adam(1e-3) # or opt=SciPyMin('l-bfgs-b')
+
+gp=NNRegressor(layers,opt=opt,batch_size=x_train.shape[0],maxiter=1000,gp=True,verbose=True)
+gp.fit(x_train,y_train)
+y_pred,std=gp.predict(x_test)
+
+```
+
+The example creates a mapping z(x) where both x and z are 1d vectors using a neural network with 1 hidden layer.
+The CovMat layer creates a covariance matrix from z using the covariance function v\*exp(-0.5*|z1-z2|**2) with noise y where x and y are learned during training. 
+
+x and y are available after training as gp.layers[-1].var and gp.layers[-1].s_alpha.
+The gp.fast_forward() function can be used to extract the z(x) function (It skips the last layer that makes an array of size [batch_size, batch_size]).
+
+### Learning a function with varying length scale
+
+In the example.py script, deep kernel learning (DKL) is used to learn from samples of the function sin(64(x+0.5)**4).
+
+Learning this function with a Neural Network would be hard, since it can be challenging to fit rapidly oscilating functions using NNs.
+Learning the function using GPRegression with a squared exponential covariance function, would also be suboptimal, since we need to commit to one fixed length scale.
+Unless we have a lot of samples,we would be forced to give up precision on the slowly varying part of the function.
+
+DKL Prediction:
+
+<p align="center">
+<figure align="center">
+
+ <img src="ex1_1.png" width="350"/> 
+ <figcaption>DKL Prediction</figcaption>
+
+
+</figure>
+<figure>
+ <img src="ex1_2.png" width="350"/> 
+ <figcaption> z(x) function learned by neural network.</figcaption>
+</figure>
+</p>
+
+We see that DKL solves the problem quite nicely, given the limited data. We also see that for x<-0.5 the std.dev of the DKL model does not capture the prediction error.
diff --git a/GP/DKL/__init__.py b/GP/DKL/__init__.py
@@ -0,0 +1,4 @@
+from __future__ import absolute_import
+
+from . import dknet
+
diff --git a/GP/DKL/collect_data.py b/GP/DKL/collect_data.py
@@ -0,0 +1,41 @@
+
+import os
+import numpy as np
+import scipy.io as sio
+
+
+base_path = '/u1/lcls/matlab/data/2018/2018-01/'
+quadlist = ['620', '640', '660', '680']
+quadlist = sorted(['QUAD_LTU1_' + x + '_BCTRL' for x in quadlist])
+gdet = 'GDET_FEE1_241_ENRCHSTBR'
+energy = 'BEND_DMP1_400_BDES'
+
+X = np.zeros((0,len(quadlist)+1))
+
+for dir in os.listdir(base_path):
+ path = base_path + dir + '/'
+ for f in os.listdir(path):
+ if f[:3]=='Oce':
+ try: 
+ rawdat = sio.loadmat(path+f)['data']
+ except:
+ continue
+ if set(quadlist).issubset(set(rawdat.dtype.names)):
+ y = rawdat[gdet][0][0]
+ es = rawdat[energy][0][0]
+ qs = [rawdat[q][0][0] for q in quadlist]
+ shps = [x.shape[1] for x in qs]
+ if y.shape[1] < 3 or min(shps) != max(shps) or shps[0] < 3:
+ continue
+ if y.shape[1] > shps[0]:
+ y = y[:,:-1]
+ new_stack = np.zeros((y.shape[1], X.shape[1])) 
+ for i,q in enumerate(quadlist):
+ new_stack[:,i] = qs[i] / es
+
+ new_stack[:,-1] = y
+ X = np.concatenate((X,new_stack),axis=0)
+
+np.savetxt('ltus_enormed.csv', X) 
+
+
diff --git a/GP/DKL/dknet/__init__.py b/GP/DKL/dknet/__init__.py
@@ -0,0 +1,8 @@
+from __future__ import absolute_import
+
+from . import layers
+from . import models
+from . import optimizers
+from . import utils
+from . import loss
+from .models import NNRegressor
diff --git a/GP/DKL/dknet/layers/__init__.py b/GP/DKL/dknet/layers/__init__.py
@@ -0,0 +1,16 @@
+from __future__ import absolute_import
+from . import layer
+from . import activation
+from . import convolutional
+from . import dense
+from . import reshape
+from . import pooling
+from . import dropout
+
+
+from .pooling import MaxPool2D,AveragePool2D
+from .dense import Dense,RNNCell,CovMat,Parametrize,Scale
+from .convolutional import Conv2D
+from .activation import Activation
+from .reshape import Flatten
+from .dropout import Dropout
diff --git a/GP/DKL/dknet/layers/activation.py b/GP/DKL/dknet/layers/activation.py
@@ -0,0 +1,60 @@
+import numpy
+from numpy import unravel_index
+from .layer import Layer
+def relu(x,dtype=numpy.float64):
+ tmp=(x>=0)
+ return x*tmp,1*tmp
+
+def sigmoid(x,dtype=numpy.float64):
+ a=1.0/(numpy.exp(-x)+1.0)
+ return a, a*(1-a)
+
+def linear(x,dtype=numpy.float64):
+ return x,1.0#numpy.ones_like(x,dtype=dtype)
+
+def tanh(x,dtype=numpy.float64):
+ a=numpy.tanh(x)
+ return a, 1.0-a**2
+
+def lrelu(x,dtype=numpy.float64):
+ y=(x>=0)*1.0+(x<0)*0.01
+ return y*x,y
+
+def softplus(x,dtype=numpy.float64):
+ tmp=numpy.exp(x)
+ return numpy.log(tmp+1.0), tmp/(1.0+tmp)
+
+def softmax(x,dtype=numpy.float64):
+ s=numpy.exp(x)
+ s=s/numpy.sum(s,1)[:,numpy.newaxis]
+ return s,s*(1.0-s)
+
+def rbf(x,dtype=numpy.float64):
+
+ s=numpy.exp(-0.5*numpy.sum(x**2,-1))
+ print(x.shape,s.shape)
+ return s, -x*s[:,:,numpy.newaxis]
+
+class Activation(Layer):
+
+ dict={'linear':linear,'relu':relu,'sigmoid':sigmoid,'tanh':tanh,'softmax':softmax,'lrelu':lrelu,'softplus':softplus,'rbf':rbf} 
+
+ def __init__(self,strr):
+
+ if strr in self.dict.keys():
+ self.afstr=strr
+ self.af=self.dict[strr]
+ else:
+ print("Error. Undefined activation function '" + str(strr)+"'. Using linear activation.")
+ print("Available activations: " + str(list(self.dict.keys())))
+ self.af=linear
+ self.afstr='linear'
+ self.trainable=False
+ def forward(self,X):
+ self.inp=X
+ self.a=self.af(X,dtype=self.dtype)
+ self.out=self.a[0]
+ return self.out
+ def backward(self,err):
+ return self.a[1]*err
+
diff --git a/GP/DKL/dknet/layers/convolutional.py b/GP/DKL/dknet/layers/convolutional.py
@@ -0,0 +1,52 @@
+import numpy
+from numpy import unravel_index
+from .activation import Activation
+from .layer import Layer
+
+class Conv2D(Layer):
+ def __init__(self,n_out,kernel_size,activation=None):
+ self.n_out=n_out
+ self.activation=activation
+ self.kernel_size=kernel_size
+ self.trainable=True
+ def initialize_ws(self):
+ self.W=numpy.random.randn(self.kernel_size[0],self.kernel_size[1],self.n_inp,self.n_out).astype(dtype=self.dtype)*numpy.sqrt(1.0/(self.n_inp*numpy.prod(self.kernel_size)))
+ self.b=numpy.zeros((1,self.n_out),dtype=self.dtype)
+ self.dW=numpy.zeros_like(self.W,dtype=self.dtype)
+ self.db=numpy.zeros_like(self.b,dtype=self.dtype)
+ assert(self.W.shape[0]%2!=0) #Odd filter size pls
+ assert(self.W.shape[1]%2!=0) #Odd fiter size pls
+ def forward(self,X):
+ self.inp=X
+
+ hpad,wpad=int(self.W.shape[0]/2),int(self.W.shape[1]/2)
+ X2=numpy.zeros((X.shape[0],X.shape[1]+2*hpad,X.shape[2]+2*wpad,X.shape[3]),dtype=self.dtype)
+ X2[:,hpad:X2.shape[1]-hpad,wpad:X2.shape[2]-wpad,:]=numpy.copy(X)
+ A=numpy.zeros((X.shape[0],X.shape[1],X.shape[2],self.n_out),dtype=self.dtype)
+ M,N=X.shape[1],X.shape[2]
+ for i in range(0,M):
+ for j in range(0,N):
+ A[:,i,j,:]=numpy.sum(X2[:,hpad+i-hpad:hpad+i+hpad+1,wpad+j-wpad:wpad+j+wpad+1,:][:,:,:,:,numpy.newaxis]*self.W[numpy.newaxis,:,:,:,:],axis=(1,2,3))
+ A+=self.b[0,:]
+
+ self.out=A
+ return self.out
+
+ def backward(self,err):
+
+ X=self.inp
+ hpad,wpad=int(self.W.shape[0]/2),int(self.W.shape[1]/2)
+ X2=numpy.zeros((X.shape[0],X.shape[1]+2*hpad,X.shape[2]+2*wpad,X.shape[3]),dtype=self.dtype)
+ X2[:,hpad:X2.shape[1]-hpad,wpad:X2.shape[2]-wpad,:]=numpy.copy(X)
+
+ tmpdW=numpy.zeros_like(self.dW,dtype=self.dtype)
+ dodi=numpy.zeros_like(X2,dtype=self.dtype)
+ M,N=X.shape[1],X.shape[2]
+ for i in range(0,M):
+ for j in range(0,N):
+ tmpdW+=numpy.sum(err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,numpy.newaxis,:]*X2[:,i:i+2*hpad+1,j:j+2*wpad+1,:][:,:,:,:,numpy.newaxis],0)
+ dodi[:,i:i+2*hpad+1,j:j+2*wpad+1,:]+=numpy.sum(err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,numpy.newaxis,:]*self.W[numpy.newaxis,:,:,:,:],-1)
+ self.dW=tmpdW
+ self.db[0,:]=numpy.sum(err,(0,1,2))
+
+ return dodi[:,hpad:dodi.shape[1]-hpad,wpad:dodi.shape[2]-wpad,:]