Skip to content

Commit

Permalink
fixed the lower bound; still a range bug that crops up occasionally
Browse files Browse the repository at this point in the history
  • Loading branch information
tonymugen committed Aug 7, 2020
1 parent b108eb8 commit ea7979a
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 24 deletions.
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

vbFit <- function(yVec, d, nPop, alphaPr, nuPr, tauPr, ppRatio) {
.Call(`_MuGaMix_vbFit`, yVec, d, nPop, alphaPr, nuPr, tauPr, ppRatio)
vbFit <- function(yVec, d, nPop, alphaPr, tauPr, ppRatio) {
.Call(`_MuGaMix_vbFit`, yVec, d, nPop, alphaPr, tauPr, ppRatio)
}

testLpostNR <- function(yVec, d, Npop, theta, P, ind, limit, incr) {
Expand Down
9 changes: 4 additions & 5 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@
using namespace Rcpp;

// vbFit
Rcpp::List vbFit(const std::vector<double>& yVec, const int32_t& d, const int32_t& nPop, const double& alphaPr, const double& nuPr, const double& tauPr, const double& ppRatio);
RcppExport SEXP _MuGaMix_vbFit(SEXP yVecSEXP, SEXP dSEXP, SEXP nPopSEXP, SEXP alphaPrSEXP, SEXP nuPrSEXP, SEXP tauPrSEXP, SEXP ppRatioSEXP) {
Rcpp::List vbFit(const std::vector<double>& yVec, const int32_t& d, const int32_t& nPop, const double& alphaPr, const double& tauPr, const double& ppRatio);
RcppExport SEXP _MuGaMix_vbFit(SEXP yVecSEXP, SEXP dSEXP, SEXP nPopSEXP, SEXP alphaPrSEXP, SEXP tauPrSEXP, SEXP ppRatioSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::vector<double>& >::type yVec(yVecSEXP);
Rcpp::traits::input_parameter< const int32_t& >::type d(dSEXP);
Rcpp::traits::input_parameter< const int32_t& >::type nPop(nPopSEXP);
Rcpp::traits::input_parameter< const double& >::type alphaPr(alphaPrSEXP);
Rcpp::traits::input_parameter< const double& >::type nuPr(nuPrSEXP);
Rcpp::traits::input_parameter< const double& >::type tauPr(tauPrSEXP);
Rcpp::traits::input_parameter< const double& >::type ppRatio(ppRatioSEXP);
rcpp_result_gen = Rcpp::wrap(vbFit(yVec, d, nPop, alphaPr, nuPr, tauPr, ppRatio));
rcpp_result_gen = Rcpp::wrap(vbFit(yVec, d, nPop, alphaPr, tauPr, ppRatio));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -307,7 +306,7 @@ END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_MuGaMix_vbFit", (DL_FUNC) &_MuGaMix_vbFit, 7},
{"_MuGaMix_vbFit", (DL_FUNC) &_MuGaMix_vbFit, 6},
{"_MuGaMix_testLpostNR", (DL_FUNC) &_MuGaMix_testLpostNR, 8},
{"_MuGaMix_testLpostP", (DL_FUNC) &_MuGaMix_testLpostP, 8},
{"_MuGaMix_testLpostLocNR", (DL_FUNC) &_MuGaMix_testLpostLocNR, 8},
Expand Down
4 changes: 2 additions & 2 deletions src/functions4R.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#include "gmmvb.hpp"

//[[Rcpp::export(name="vbFit")]]
Rcpp::List vbFit(const std::vector<double> &yVec, const int32_t &d, const int32_t &nPop, const double &alphaPr, const double &nuPr, const double &tauPr, const double &ppRatio){
Rcpp::List vbFit(const std::vector<double> &yVec, const int32_t &d, const int32_t &nPop, const double &alphaPr, const double &tauPr, const double &ppRatio){
if (nPop <= 1) {
Rcpp::stop("Number of populations must be greater than 1");
}
Expand All @@ -56,7 +56,7 @@ Rcpp::List vbFit(const std::vector<double> &yVec, const int32_t &d, const int32_
std::vector<double> r;
std::vector<double> lBound;
try {
BayesicSpace::GmmVB vbModel(&yVec, ppRatio, nuPr, tauPr, alphaPr, static_cast<size_t>(nPop), static_cast<size_t>(d), &vPopMn, &vSm, &r, &Nm);
BayesicSpace::GmmVB vbModel(&yVec, ppRatio, tauPr, alphaPr, static_cast<size_t>(nPop), static_cast<size_t>(d), &vPopMn, &vSm, &r, &Nm);
vbModel.fitModel(lBound);
} catch (std::string problem) {
Rcpp::stop(problem);
Expand Down
37 changes: 24 additions & 13 deletions src/gmmvb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
*/

#include <algorithm>
#include <math.h>
#include <vector>
#include <string>
#include <cmath>
Expand All @@ -45,7 +46,7 @@ using std::numeric_limits;

const double GmmVB::lnMaxDbl_ = log( numeric_limits<double>::max() );

GmmVB::GmmVB(const vector<double> *yVec, const double &lambda0, const double &nu0, const double &tau0, const double alpha0, const size_t &nPop, const size_t &d, vector<double> *vPopMn, vector<double> *vSm, vector<double> *resp, vector<double> *Nm) : yVec_{yVec}, Nm_{Nm}, lambda0_{lambda0}, nu0_{nu0}, tau0_{tau0}, alpha0_{alpha0}, d_{static_cast<double>(d)}, nu0p2_{nu0 + 2.0}, nu0p1_{nu0 + 1.0}, dln2_{static_cast<double>(d)*0.6931471806}, maxIt_{200}, stoppingDiff_{1e-4} {
GmmVB::GmmVB(const vector<double> *yVec, const double &lambda0, const double &tau0, const double alpha0, const size_t &nPop, const size_t &d, vector<double> *vPopMn, vector<double> *vSm, vector<double> *resp, vector<double> *Nm) : yVec_{yVec}, Nm_{Nm}, lambda0_{lambda0}, nu0_{static_cast<double>(d)}, tau0_{tau0}, alpha0_{alpha0}, d_{static_cast<double>(d)}, nu0p2_{static_cast<double>(d) + 2.0}, nu0p1_{static_cast<double>(d) + 1.0}, dln2_{static_cast<double>(d)*0.6931471806}, maxIt_{200}, stoppingDiff_{1e-4} {
#ifndef PKG_DEBUG_OFF
if (yVec->size()%d) {
throw string("ERROR: Y dimensions not compatible with the number of traits supplied in the GmmVB constructor");
Expand Down Expand Up @@ -140,13 +141,25 @@ void GmmVB::fitModel(vector<double> &lowerBound) {
}
// scale the outputs as necessary
for (size_t m = 0; m < M_.getNrows(); m++) { // crossproduct into covariance
S_[m] /= (*Nm_)[m];
if ( (*Nm_)[m] > numeric_limits<double>::epsilon() ) {
S_[m] /= (*Nm_)[m];
}
}
double dm = 2.0;
for (size_t m = 2; m <= M_.getNrows(); m++) { // add ln K! as recommended in Bishop, page 484
lowerBound.back() += log(dm);
dm += 1.0;
}
// add the constant
const double n = static_cast<double>( Y_.getNrows() );
const double nPop = static_cast<double>( M_.getNrows() );
double sumLnGam = 0.0;
double k = 1.0;
for (size_t kk = 0; kk < Y_.getNcols(); kk++) {
sumLnGam += nuc_.lnGamma( 0.5*(nu0p1_ - k) );
k += 1.0;
}
lowerBound.back() += nuc_.lnGamma(nPop*alpha0_) - nPop*nuc_.lnGamma(alpha0_) - nuc_.lnGamma(nPop*alpha0_ + n) - nPop*sumLnGam + 0.5*d_*(nPop*(nu0_ + nu0_*log(tau0_) + log(lambda0_) + 2.6931471806) - n*0.144729886);
}

void GmmVB::eStep_(){
Expand Down Expand Up @@ -288,11 +301,14 @@ double GmmVB::getLowerBound_(){
const size_t N = Y_.getNrows();
double lwrBound = 0.0;
for (size_t m = 0; m < M_.getNrows(); m++) {
sumDiGam_[m] = 0.0;
sumDiGam_[m] = 0.0;
double sumLnGam = 0.0;
const double nu0p2Nm = nu0p2_ + (*Nm_)[m];
double k = 1.0;
for (size_t kk = 0; kk < d; kk++) {
sumDiGam_[m] += nuc_.digamma( (nu0p2Nm - k)/2.0 );
const double arg = (nu0p2Nm - k)/2.0;
sumDiGam_[m] += nuc_.digamma(arg);
sumLnGam += nuc_.lnGamma(arg);
k += 1.0;
}
// add the log-pseudo-determinant
Expand All @@ -306,14 +322,14 @@ double GmmVB::getLowerBound_(){
lnDet_[m] += log(l);
}
}
const double psiElmt = 0.5*(sumDiGam_[m] + dln2_ + lnDet_[m]);
const double psiElmt = 0.5*(sumDiGam_[m] + dln2_ + (nu0p2Nm + 1.0)*lnDet_[m]);
// add the matrix trace
double matTr = 0.0;
vector<double> vSS(d*d, 0.0);
MatrixView SS(&vSS, 0, d, d);
SigM_[m].symm('l', 'l', 1.0, S_[m], 0.0, SS);
for (size_t kk = 0; kk < d; kk++) {
matTr += SS.getElem(kk, kk);
matTr += SS.getElem(kk, kk) + tau0_*SigM_[m].getElem(kk, kk);
}
// a_m crossproduct
vector<double> aS(d, 0.0);
Expand All @@ -329,12 +345,7 @@ double GmmVB::getLowerBound_(){
const double nu0p1Nm = nu0p1_ + (*Nm_)[m];
const double lmNmSm = lambda0_ + (*Nm_)[m];
// the big first sum (in curly brackets in the model document), multiplied by N_m
const double bigSum = (*Nm_)[m]*( psiElmt + nu0p1Nm*(lambda0_*(0.5*lambda0_ - (*Nm_)[m])*aSaT/(lmNmSm*lmNmSm) - 0.5*matTr) );
// Sig_0 product trace
double sg0trace = 0.0;
for (size_t kk = 0; kk < d; kk++) {
sg0trace += tau0_*SigM_[m].getElem(kk, kk);
}
const double bigSum = psiElmt + nu0p1Nm*( (*Nm_)[m]*lambda0_*(0.5*lambda0_ - (*Nm_)[m])*aSaT/(lmNmSm*lmNmSm) - 0.5*matTr );
// r_jm ln(r_jm) sum
double rLnr = 0.0;
for (size_t i = 0; i < N; i++) {
Expand All @@ -344,7 +355,7 @@ double GmmVB::getLowerBound_(){
}
}
// put it all together (+= because we are summing across populations)
lwrBound += bigSum - 0.5*nu0p1Nm*sg0trace - rLnr + nuc_.lnGamma(alpha0_ + (*Nm_)[m]) - 0.5*d_*log(lmNmSm);
lwrBound += bigSum - rLnr + nuc_.lnGamma(alpha0_ + (*Nm_)[m]) - 0.5*d_*log(lmNmSm) + sumLnGam;
}
return lwrBound;
}
Expand Down
3 changes: 1 addition & 2 deletions src/gmmvb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ namespace BayesicSpace {
*
* \param[in] yVec pointer to vectorized data matrix
* \param[in] lambda0 prior precision scale factor
* \param[in] nu0 prior precision degrees of freedom
* \param[in] tau0 prior precision
* \param[in] alpha0 prior population size
* \param[in] nPop number of populations
Expand All @@ -64,7 +63,7 @@ namespace BayesicSpace {
* \param[in, out] resp pointer to vectorized matrix responsibilities
* \param[in, out] Nm pointer to vector of effective population sizes
*/
GmmVB(const vector<double> *yVec, const double &lambda0, const double &nu0, const double &tau0, const double alpha0, const size_t &nPop, const size_t &d, vector<double> *vPopMn, vector<double> *vSm, vector<double> *resp, vector<double> *Nm);
GmmVB(const vector<double> *yVec, const double &lambda0, const double &tau0, const double alpha0, const size_t &nPop, const size_t &d, vector<double> *vPopMn, vector<double> *vSm, vector<double> *resp, vector<double> *Nm);
/** \brief Destructor */
~GmmVB(){ yVec_ = nullptr; Nm_ = nullptr; };

Expand Down

0 comments on commit ea7979a

Please sign in to comment.