diff --git a/hmc/__init__.py b/hmc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hmc/hmc.py b/hmc/hmc.py new file mode 100644 index 0000000..52c4b65 --- /dev/null +++ b/hmc/hmc.py @@ -0,0 +1,259 @@ +#-*- coding:utf-8 -*- + +import numpy +from theano import function ,shared +from theano import tensor as TT +import theano + +sharedX=lambda X, name : \ + shared(numpy.asarray(X,dtype=theano.config.floatX),name=name) + +#定义动能函数 +#input: +# vel: matrix的符号变量,每行为速度矢量 +#output: +# vector的符号矢量 第i个元素是第i个速度的动能 +def kinetic_energy(vel): + return 0.5*(vel**2).sum(axis=1) + +#给定位置和速度,返回hamilton函数(动能与势能之和) +#input: +# pos:matrix的符号变量,每行为位置矢量 +# vel:matrix的符号变量,每行为速度矢量 +# energy_fn:python函数 给定位置信息后,输出势能 +#output: +# theano vector 其中第i个元素表示第i个位置和第i个速度的hanmilton函数 +def hamiltonian(pos,vel,energy_fn): + return kinetic_energy(vel)+energy_fn(pos) + +#进行metropolis_hastings接受拒绝计算 +#input: +# energy_prev: theano vector t时刻的能量函数 +# engergy_next: theano vector t+1时刻的能量函数 +# s_rng: RandomStreams 用于生成随机数的stream对象 +#output: +# 返回为真,则是接受, +def metropolis_hastings_accept(energy_prev,energy_next,s_rng): + ediff=energy_next-energy_prev + return (TT.exp(ediff)-s_rng.uniform(size=energy_prev.shape))>=0 + +#执行'n_steps'步使用hamilton动态函数更新后,返回最终的(位置。速度)信息。 +#input: +# initial_pos: theano matrix 初始位置 +# initial_vel: thenao matrix 初始速度 +# stepsize: theano scalar 步长 +# n_steps: theano scalar 总步数 +# energy_fn: python function 计算势能的函数 +#output: +# rval1: thenao matrix 最终位置 +# rval2: theano matrix 最终速度 +def simulate_dynamics(initial_pos,initial_vel,stepsize,n_steps,energy_fn): + #一步hamilton动态函数更新 + #input: + # pos: theano matrix t时刻的位置信息 + # vel: theano matrix t时刻的速度信息 + # step: thenao scalar 步长 + #output: + # rval1: [matrix,matrix] 新的位置pos(t+stepsize) 新的速度vel(t+stepsize) + # rval2: dictionary 用于Scan操作的updates + def leapfrog(pos,vel,step): + #根据pos(t)和vel(t-stepsize/2)计算vel(t+stepsize/2) + dE_pos=TT.grad(energy_fn(pos).sum(),pos) + new_vel=vel-step*dE_pos + new_pos=pos+step*new_vel + return [new_pos,new_vel],{} + + #计算t+stepsize/2时刻的速度 + initial_energy=energy_fn(initial_pos) + dE_dpos=TT.grad(initial_energy.sum(),initial_pos) + vel_half_step=initial_vel-0.5*stepsize*dE_dpos + #计算t+stepsize时刻的位置 + pos_full_step=initial_pos+stepsize*vel_half_step + + #进行leapfrog更新:使用scan操作 + #vel(t+(m-1/2)*stepsize)和pos(t+m*stepsize) 其中m为[2,n_step] + (all_pos,all_vel),scan_updates=theano.scan(leapfrog, + outputs_info=[dict(initial=pos_full_step), + dict(initial=vel_half_step), + ], + non_sequences=[stepsize], + n_steps=n_steps-1) + final_pos=all_pos[-1] + final_vel=all_vel[-1] + #Scan函数返回更新字典,Scan函数从RandomStream中采样 + #在编译Theano函数时,使用更新字典,从而避免每次调用函数时生成随机数。 + #然后在本程序中,有意识的忽略“scan_updates”,因为我们知道它是空的 + assert not scan_updates + + #scan返回的最后的速度值 vel((t +# (n_steps - 1 / 2) * stepsize)) + #此时,再多进行一次操作,返回vel(t + n_steps * stepsize) + energy=energy_fn(final_pos) + final_vel=final_vel-0.5*stepsize*TT.grad(energy.sum(),final_pos) + + #返回最终的位置和速度 + return final_pos,final_vel + + +#本函数执行一步HMC采样。首先,由单变量高斯分布对速度采样,然后使用Hamilton动态函数 +# 执行'n_steps'蛙跳更新,并使用Metropolis-Hastings执行接受拒绝检验。 +#input: +# s_rng: thenao.shared.random.stream 生成随机速度 +# positions: theano.shared.matrix 位置矩阵,每行为位置矢量 +# energy_fn: python函数 计算给定位置的势能 +# stepsize: theano.shared.scalar 'n_steps'步HMC仿真的步长 +# n_steps: int 仿真步数 +#output: +# rval1:bool 结果为真,则接受move,否则拒绝move +# rval2: thenao.matrix 每行为新的位置 +def hmc_move(s_rng,positions,energy_fn,stepsize,n_steps): + #速度的随机采样 + initial_vel=s_rng.normal(size=positions.shape) + #执行Hamilton动态函数仿真 + final_pos,final_vel=simulate_dynamics(initial_pos=positions, + initial_vel=initial_vel, + stepsize=stepsize, + n_steps=n_steps, + energy_fn=energy_fn) + #基于联合分布计算接受/拒绝 + accept=metropolis_hastings_accept(energy_prev=hamiltonian(positions,initial_vel,energy_fn), + energy_next=hamiltonian(positions,initial_vel,energy_fn), + s_rng=s_rng) + + return accept,final_pos + +#函数执行'n_step'步HMC采样(hmc_move函数)。本函数生成了用于'simulate'函数的更新字典, +#更新包括:位置(如果move被接受),stepsize(计算给定目标的接受率),平均接受率(计算moving平均) +#input: +# positions: theano.matrix 每行为旧位置 +# stepsize: thenao.scalar 当前步长 +# avg_acceptance_rate: theano.scalar 当前平均接受率 +# final_pos: theano.matrix 每行为新位置 +# accept: theano.scalar bool型,HMCmove是否被接受 +# target_acceptance_rate: float 目标接受率 +# stepsize_inc: float 步长增长率 +# stepsize_dec: float 步长下降率 +# stepsize_min: float 最小步长 +# stepsize_max: float 最大步长 +# avg_acceptance_slowness: float 指数平均moving的平均接受率 +# (1-avg_acceptance_slowness)是最新观测值的权重 +def hmc_updates(positions,stepsize,avg_acceptance_rate,final_pos,accept, + target_acceptance_rate,stepsize_inc,stepsize_dec, + stepsize_min,stepsize_max,avg_acceptance_slowness): + + ################### + ######位置更新###### + ################### + #将'accept'由scalar扩展为tensor,与final_pos有相同维度,dimshuffle不太懂 + accept_matrix=accept.dimshuffle(0,*(('x',) * (final_pos.ndim - 1))) + #如果accept为True,则更新'final_pos',否则保持不变 + new_positions=TT.switch(accept_matrix,final_pos,positions) + + ################### + ###步长更新######### + ################### + #如果接受率太低,或样本噪点太多,那么减小步长;如果接受率太高,或样本过于保守, + #那么增大步长。 + _new_stepsize=TT.switch(avg_acceptance_rate>target_acceptance_rate, + stepsize*stepsize_inc, + stepsize*stepsize_dec) + #保持stepsize在[stepsize_min,stepsize_max]区间内 + new_stepsize=TT.clip(_new_stepsize,stepsize_min,stepsize_max) + ################### + ##接受率更新######## + ################### + #执行指数平均moving + mean_dtype=theano.scalar.upcast(accept.dtype,avg_acceptance_rate.dtype) + new_acceptance_rate=TT.add( + avg_acceptance_slowness*avg_acceptance_rate, + (1-avg_acceptance_slowness)*accept.mean(dtype=mean_dtype)) + + return [(positions,new_positions), + (stepsize,new_stepsize), + (avg_acceptance_rate,new_acceptance_rate)] + +#混合蒙特卡洛采样的封装函数。该函数建立符号图执行HMC仿真(使用'hmc_move'和'hmc_updates')。 +#该图在'simulate'函数中编译,'simulate'函数执行仿真,其中更新值要求共享变量。 +#用户通过'draw'函数中先进蒙特卡洛法采样,函数中依次调用‘simulate’和 ‘get_position’函数返回当前采样值 +#超参数与Marc'Aurelio'编写的'train_mcRBM.py'文件中超参数相同(代码见Marc'Aurelio'的个人主页)。 +class HMC_sampler(object): + def __init__(self,**kwargs): + self.__dict__.update(kwargs) + + #input: + # shared_positions:theano.tensor.matrix 保存很多粒子初始位置的一维矩阵 + # energy_fn: 调用energy_fn(positions)函数,返回能量矢量,长度为batchsize + # 能量矢量之和必须可微(使用theano.tensor.grad),用于HMC采样 + @classmethod + def new_from_shared_positions(cls,shared_positions,energy_fn, + initial_stepsize=0.01,target_acceptance_rate=0.9,n_steps=20, + stepsize_dec=0.88, + stepsize_min=0.001, + stepsize_max=0.25, + stepsize_inc=1.02, + avg_acceptance_slowness=0.9, #用于生成avg,如果值为1.0,则不移动 + seed=12345): + batchsize=shared_positions.shape[0] + #定义符号变量 + stepsize=sharedX(initial_stepsize,'hmc_stepsize') + avg_acceptance_rate=sharedX(target_acceptance_rate,'avg_acceptance_rate') + s_rng=TT.shared_randomstreams.RandomStreams(seed) + + #定义'n_step'步HMC仿真 + accept,final_pos=hmc_move( + s_rng, + shared_positions, + energy_fn, + stepsize, + n_steps) + + #定义更新字典,用于每次'simulate'函数 + simulate_updates=hmc_updates( + shared_positions, + stepsize, + avg_acceptance_rate, + final_pos=final_pos, + accept=accept, + stepsize_min=stepsize_min, + stepsize_max=stepsize_max, + stepsize_inc=stepsize_inc, + stepsize_dec=stepsize_dec, + target_acceptance_rate=target_acceptance_rate, + avg_acceptance_rate=avg_acceptance_rate) + + #编译函数 + simulate=function([],[],updates=simulate_updates) + + #创建包含以下属性的HMC_sampler对象: + return cls( + positions=shared_positions, + stepsize=stepsize, + stepsize_min=stepsize_min, + stepsize_max=stepsize_max, + avg_acceptance_rate=avg_acceptance_rate, + target_acceptance_rate=target_acceptance_rate, + s_rng=s_rng, + _updates=simulate_updates, + simulte=simulate) + + #执行'n_steps'HMC仿真后,返回新的位置 + #input: + # kwargs: dictionary 使用'get_values()'函数传递共享变量(self.positions) + # 例如,如果不想复制共享变量,则设置'borrow=False' + #output: + # rval: numpy.matrix 矩阵维度与'iniital_position'相同 + def draw(self,**kwargs): + self.simulate() + return self.positions.get_value(borrow=False) + + + + + + + + + + + + + diff --git a/hmc/hmc.pyc b/hmc/hmc.pyc new file mode 100644 index 0000000..c44877c Binary files /dev/null and b/hmc/hmc.pyc differ diff --git a/hmc/test_hmc.py b/hmc/test_hmc.py new file mode 100644 index 0000000..fbd182b --- /dev/null +++ b/hmc/test_hmc.py @@ -0,0 +1,65 @@ +#-*- coding:utf-8 -*- + +import numpy +from scipy import linalg +import theano + +from hmc import HMC_sampler + +def sampler_on_nd_gaussian(sampler_cls,burnin,n_samples,dim=10): + batchsize=3 + rng=numpy.random.RandomState(123) + + #定义高斯的期望和协方差 + mu=numpy.array(rng.rand(dim)*10,dtype=theano.config.floatX) + cov=numpy.array(rng.rand(dim,dim),dtype=theano.config.floatX) + cov=(cov+cov.T)/2 + cov[numpy.arange(dim),numpy.arange(dim)]=1.0 + cov_inv=linalg.inv(cov) #cov的逆矩阵 + + #定义多变量高斯函数的能量函数 + def gaussian_energy(x): + return 0.5*(theano.tensor.dot((x-mu),cov_inv)*(x-mu)).sum(axis=1) + + #定义位置的共享变量 + positon=rng.randn(batchsize,dim).astype(theano.config.floatX) + positon=theano.shared(positon) + + #创建HMC样本 + sampler=sampler_cls(positon,gaussian_energy, + initial_stepsize=1e-3,stepsize_max=0.5) + + #开始HMC仿真 + garbage=[sampler.draw() for r in xrange(burnin)] + #'n_samples':返回3D张量 [n_samples,batchsize,dim] + _samples=numpy.asarray([sampler.draw() for r in xrange(n_samples)]) + #将样本展开,[n_samples*batchsize,dim] + samples=_samples.T.reshape(dim,-1).T + + print '************ TARGET VALUES **************' + print 'target mean: ',mu + print 'target cov:\n ',cov + + print '****** EMPIRICAL MEAN/COV USING HMC ******' + print 'empirical mean: ', samples.mean(axis=0) + print 'empirical_cov:\n', numpy.cov(samples.T) + + print '****** HMC INTERNALS ******' + print 'final stepsize', sampler.stepsize.get_value() + print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value() + + return sampler + + +def test_hmc(): + sampler = sampler_on_nd_gaussian(HMC_sampler.new_from_shared_positions, + burnin=1000, n_samples=1000, dim=5) + assert abs(sampler.avg_acceptance_rate.get_value() - + sampler.target_acceptance_rate) < .1 + assert sampler.stepsize.get_value() >= sampler.stepsize_min + assert sampler.stepsize.get_value() <= sampler.stepsize_max + + + + + diff --git a/rbm.py b/rbm.py index bcc41a4..4686380 100644 --- a/rbm.py +++ b/rbm.py @@ -183,6 +183,49 @@ def gibbs_vhv(self,v0_sample): return [pre_sigmoid_h1,h1_mean,h1_sample, pre_sigmoid_v1,v1_mean,v1_sample] + + + + + + # def get_reconstruction_cost(self, updates, pre_sigmoid_nv): + # """Approximation to the reconstruction error + # + # Note that this function requires the pre-sigmoid activation as + # input. To understand why this is so you need to understand a + # bit about how Theano works. Whenever you compile a Theano + # function, the computational graph that you pass as input gets + # optimized for speed and stability. This is done by changing + # several parts of the subgraphs with others. One such + # optimization expresses terms of the form log(sigmoid(x)) in + # terms of softplus. We need this optimization for the + # cross-entropy since sigmoid of numbers larger than 30. (or + # even less then that) turn to 1. and numbers smaller than + # -30. turn to 0 which in terms will force theano to compute + # log(0) and therefore we will get either -inf or NaN as + # cost. If the value is expressed in terms of softplus we do not + # get this undesirable behaviour. This optimization usually + # works fine, but here we have a special case. The sigmoid is + # applied inside the scan op, while the log is + # outside. Therefore Theano will only see log(scan(..)) instead + # of log(sigmoid(..)) and will not apply the wanted + # optimization. We can not go and replace the sigmoid in scan + # with something else also, because this only needs to be done + # on the last step. Therefore the easiest and more efficient way + # is to get also the pre-sigmoid activation as an output of + # scan, and apply both the log and sigmoid outside scan such + # that Theano can catch and optimize the expression. + # + # """ + # + # cross_entropy = T.mean( + # T.sum(self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) + + # (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)), + # axis=1)) + # + # return cross_entropy + + def get_cost_updates(self,lr=0.1,persistent=None,k=1): """ @@ -271,8 +314,15 @@ def get_reconstruction_cost(self,updates,pre_sigmoid_nv): 也不会进行优化。我们找不到替代scan中sigmoid的方法,因为只需要在最后一步执行。最简单有效 的办法是输出未sigmoid的值,在scan之外同时应用log和sigmoid。 """ - cross_entropy=T.mean(T.sum(self.input*T.log(T.nnet.sigmoid(pre_sigmoid_nv))+ - (1-self.input)*T.log(1-T.nnet.sigmoid(pre_sigmoid_nv))),axis=1) + # cross_entropy=T.mean( + # T.sum(self.input*T.log(T.nnet.sigmoid(pre_sigmoid_nv))+ + # (1-self.input)*T.log(1-T.nnet.sigmoid(pre_sigmoid_nv))), + # axis=1) + + cross_entropy = T.mean( + T.sum(self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) + + (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)), + axis=1)) return cross_entropy