Skip to content

Commit

Permalink
merge with dev brunch
Browse files Browse the repository at this point in the history
  • Loading branch information
sergey-tomin committed Mar 20, 2019
2 parents 2b6f81a + fe3b6ca commit 41cde26
Show file tree
Hide file tree
Showing 100 changed files with 16,957 additions and 1,768 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
log.txt
.idea/

# PyDev
Expand Down
51 changes: 51 additions & 0 deletions GP/BasicInterfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
"""
Contains simple interfaces for the Bayes optimization class.
Each interface must have the getState and setX methods as used below.
"""

import numpy as np

# Basically a dummy interface that just holds x-values
class TestInterface(object):
def __init__(self, x_init, y_init=0):
self.x = np.array(x_init,ndmin=2)
self.y = y_init

def getState(self):
return self.x, self.y

def setX(self, x_new):
self.x = x_new

# an interface that evaluates a function to give y-values
class fint(object):
def __init__(self, x_init, noise=0):
self.x = np.array(x_init,ndmin=2)
self.y = -1
self.noise = noise

def getState(self):
return self.x, self.f(self.x)

def f(self, x):
res = 20 - np.reshape(np.sum((x - 1)**2,axis=1) * np.sum((x + 1)**2,axis=1),x.shape) + x
res[np.abs(x) > 3.0] = 0
return res

def setX(self, x_new):
self.x = x_new

# uses a GP model's predictions to give y-values
class GPint(object):
def __init__(self, x_init, model):
self.model = model
self.x = x_init
self.y = model.predict(np.array(x_init,ndmin=2))

def getState(self):
return self.x, self.model.predict(np.array(self.x,ndmin=2))[0]

def setX(self, x_new):
self.x = x_new
73 changes: 73 additions & 0 deletions GP/DKL/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Deep-Kernel-GP

## Dependencies
The package has numpy and scipy.linalg as dependencies.
The examples also use matplotlib and scikit-learn

## Introduction



Instead of learning a mapping X-->Y with a neural network or GP regression, we learn the following mappings:
X-->Z-->Y where the first step is performed by a neural net and the second by a gp regression algorithm.

This way we are able to use GP Regression to learn functions on data where the the assumption that y(x) is a gaussian surface with covariance specified by one of the standard covariance fucntions, might not be a fair assumption.
For instance we can learn functions with image pixels as inputs or functions with length scales that varies with the input.


The parameters of the neural net are trained maximizing the log marginal likelihood implied by z(x_train) and y_train.

[Deep Kernel Learning - A.G. Wilson ++ ](https://arxiv.org/pdf/1511.02222.pdf)

[Using Deep Belief Nets to Learn Covariance Kernels
for Gaussian Processes - G. Hinton ++](http://www.cs.toronto.edu/~fritz/absps/dbngp.pdf)

## Examples
Basic usage is done with a Scikit ish API:

```python

layers=[]
layers.append(Dense(32,activation='tanh'))
layers.append(Dense(1))
layers.append(CovMat(kernel='rbf'))

opt=Adam(1e-3) # or opt=SciPyMin('l-bfgs-b')

gp=NNRegressor(layers,opt=opt,batch_size=x_train.shape[0],maxiter=1000,gp=True,verbose=True)
gp.fit(x_train,y_train)
y_pred,std=gp.predict(x_test)

```

The example creates a mapping z(x) where both x and z are 1d vectors using a neural network with 1 hidden layer.
The CovMat layer creates a covariance matrix from z using the covariance function v\*exp(-0.5*|z1-z2|**2) with noise y where x and y are learned during training.

x and y are available after training as gp.layers[-1].var and gp.layers[-1].s_alpha.
The gp.fast_forward() function can be used to extract the z(x) function (It skips the last layer that makes an array of size [batch_size, batch_size]).

### Learning a function with varying length scale

In the example.py script, deep kernel learning (DKL) is used to learn from samples of the function sin(64(x+0.5)**4).

Learning this function with a Neural Network would be hard, since it can be challenging to fit rapidly oscilating functions using NNs.
Learning the function using GPRegression with a squared exponential covariance function, would also be suboptimal, since we need to commit to one fixed length scale.
Unless we have a lot of samples,we would be forced to give up precision on the slowly varying part of the function.

DKL Prediction:

<p align="center">
<figure align="center">

<img src="ex1_1.png" width="350"/>
<figcaption>DKL Prediction</figcaption>


</figure>
<figure>
<img src="ex1_2.png" width="350"/>
<figcaption> z(x) function learned by neural network.</figcaption>
</figure>
</p>

We see that DKL solves the problem quite nicely, given the limited data. We also see that for x<-0.5 the std.dev of the DKL model does not capture the prediction error.
4 changes: 4 additions & 0 deletions GP/DKL/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from __future__ import absolute_import

from . import dknet

41 changes: 41 additions & 0 deletions GP/DKL/collect_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

import os
import numpy as np
import scipy.io as sio


base_path = '/u1/lcls/matlab/data/2018/2018-01/'
quadlist = ['620', '640', '660', '680']
quadlist = sorted(['QUAD_LTU1_' + x + '_BCTRL' for x in quadlist])
gdet = 'GDET_FEE1_241_ENRCHSTBR'
energy = 'BEND_DMP1_400_BDES'

X = np.zeros((0,len(quadlist)+1))

for dir in os.listdir(base_path):
path = base_path + dir + '/'
for f in os.listdir(path):
if f[:3]=='Oce':
try:
rawdat = sio.loadmat(path+f)['data']
except:
continue
if set(quadlist).issubset(set(rawdat.dtype.names)):
y = rawdat[gdet][0][0]
es = rawdat[energy][0][0]
qs = [rawdat[q][0][0] for q in quadlist]
shps = [x.shape[1] for x in qs]
if y.shape[1] < 3 or min(shps) != max(shps) or shps[0] < 3:
continue
if y.shape[1] > shps[0]:
y = y[:,:-1]
new_stack = np.zeros((y.shape[1], X.shape[1]))
for i,q in enumerate(quadlist):
new_stack[:,i] = qs[i] / es

new_stack[:,-1] = y
X = np.concatenate((X,new_stack),axis=0)

np.savetxt('ltus_enormed.csv', X)


8 changes: 8 additions & 0 deletions GP/DKL/dknet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from __future__ import absolute_import

from . import layers
from . import models
from . import optimizers
from . import utils
from . import loss
from .models import NNRegressor
16 changes: 16 additions & 0 deletions GP/DKL/dknet/layers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import absolute_import
from . import layer
from . import activation
from . import convolutional
from . import dense
from . import reshape
from . import pooling
from . import dropout


from .pooling import MaxPool2D,AveragePool2D
from .dense import Dense,RNNCell,CovMat,Parametrize,Scale
from .convolutional import Conv2D
from .activation import Activation
from .reshape import Flatten
from .dropout import Dropout
60 changes: 60 additions & 0 deletions GP/DKL/dknet/layers/activation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy
from numpy import unravel_index
from .layer import Layer
def relu(x,dtype=numpy.float64):
tmp=(x>=0)
return x*tmp,1*tmp

def sigmoid(x,dtype=numpy.float64):
a=1.0/(numpy.exp(-x)+1.0)
return a, a*(1-a)

def linear(x,dtype=numpy.float64):
return x,1.0#numpy.ones_like(x,dtype=dtype)

def tanh(x,dtype=numpy.float64):
a=numpy.tanh(x)
return a, 1.0-a**2

def lrelu(x,dtype=numpy.float64):
y=(x>=0)*1.0+(x<0)*0.01
return y*x,y

def softplus(x,dtype=numpy.float64):
tmp=numpy.exp(x)
return numpy.log(tmp+1.0), tmp/(1.0+tmp)

def softmax(x,dtype=numpy.float64):
s=numpy.exp(x)
s=s/numpy.sum(s,1)[:,numpy.newaxis]
return s,s*(1.0-s)

def rbf(x,dtype=numpy.float64):

s=numpy.exp(-0.5*numpy.sum(x**2,-1))
print(x.shape,s.shape)
return s, -x*s[:,:,numpy.newaxis]

class Activation(Layer):

dict={'linear':linear,'relu':relu,'sigmoid':sigmoid,'tanh':tanh,'softmax':softmax,'lrelu':lrelu,'softplus':softplus,'rbf':rbf}

def __init__(self,strr):

if strr in self.dict.keys():
self.afstr=strr
self.af=self.dict[strr]
else:
print("Error. Undefined activation function '" + str(strr)+"'. Using linear activation.")
print("Available activations: " + str(list(self.dict.keys())))
self.af=linear
self.afstr='linear'
self.trainable=False
def forward(self,X):
self.inp=X
self.a=self.af(X,dtype=self.dtype)
self.out=self.a[0]
return self.out
def backward(self,err):
return self.a[1]*err

52 changes: 52 additions & 0 deletions GP/DKL/dknet/layers/convolutional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import numpy
from numpy import unravel_index
from .activation import Activation
from .layer import Layer

class Conv2D(Layer):
def __init__(self,n_out,kernel_size,activation=None):
self.n_out=n_out
self.activation=activation
self.kernel_size=kernel_size
self.trainable=True
def initialize_ws(self):
self.W=numpy.random.randn(self.kernel_size[0],self.kernel_size[1],self.n_inp,self.n_out).astype(dtype=self.dtype)*numpy.sqrt(1.0/(self.n_inp*numpy.prod(self.kernel_size)))
self.b=numpy.zeros((1,self.n_out),dtype=self.dtype)
self.dW=numpy.zeros_like(self.W,dtype=self.dtype)
self.db=numpy.zeros_like(self.b,dtype=self.dtype)
assert(self.W.shape[0]%2!=0) #Odd filter size pls
assert(self.W.shape[1]%2!=0) #Odd fiter size pls
def forward(self,X):
self.inp=X

hpad,wpad=int(self.W.shape[0]/2),int(self.W.shape[1]/2)
X2=numpy.zeros((X.shape[0],X.shape[1]+2*hpad,X.shape[2]+2*wpad,X.shape[3]),dtype=self.dtype)
X2[:,hpad:X2.shape[1]-hpad,wpad:X2.shape[2]-wpad,:]=numpy.copy(X)
A=numpy.zeros((X.shape[0],X.shape[1],X.shape[2],self.n_out),dtype=self.dtype)
M,N=X.shape[1],X.shape[2]
for i in range(0,M):
for j in range(0,N):
A[:,i,j,:]=numpy.sum(X2[:,hpad+i-hpad:hpad+i+hpad+1,wpad+j-wpad:wpad+j+wpad+1,:][:,:,:,:,numpy.newaxis]*self.W[numpy.newaxis,:,:,:,:],axis=(1,2,3))
A+=self.b[0,:]

self.out=A
return self.out

def backward(self,err):

X=self.inp
hpad,wpad=int(self.W.shape[0]/2),int(self.W.shape[1]/2)
X2=numpy.zeros((X.shape[0],X.shape[1]+2*hpad,X.shape[2]+2*wpad,X.shape[3]),dtype=self.dtype)
X2[:,hpad:X2.shape[1]-hpad,wpad:X2.shape[2]-wpad,:]=numpy.copy(X)

tmpdW=numpy.zeros_like(self.dW,dtype=self.dtype)
dodi=numpy.zeros_like(X2,dtype=self.dtype)
M,N=X.shape[1],X.shape[2]
for i in range(0,M):
for j in range(0,N):
tmpdW+=numpy.sum(err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,numpy.newaxis,:]*X2[:,i:i+2*hpad+1,j:j+2*wpad+1,:][:,:,:,:,numpy.newaxis],0)
dodi[:,i:i+2*hpad+1,j:j+2*wpad+1,:]+=numpy.sum(err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,numpy.newaxis,:]*self.W[numpy.newaxis,:,:,:,:],-1)
self.dW=tmpdW
self.db[0,:]=numpy.sum(err,(0,1,2))

return dodi[:,hpad:dodi.shape[1]-hpad,wpad:dodi.shape[2]-wpad,:]
Loading

0 comments on commit 41cde26

Please sign in to comment.