Skip to content

Commit

Permalink
started on variational Bayes
Browse files Browse the repository at this point in the history
  • Loading branch information
tonymugen committed Jul 29, 2020
1 parent 763221a commit ab9c3c5
Show file tree
Hide file tree
Showing 5 changed files with 357 additions and 5 deletions.
25 changes: 24 additions & 1 deletion src/functions4R.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,36 @@
#include <vector>
#include <cmath>
#include <algorithm>

#include <string>
#include <limits>

#include <Rcpp.h>
#include <Rcpp/Named.h>
#include <Rcpp/vector/instantiation.h>
#include <Rcpp/exceptions/cpp11/exceptions.h>
#include <Rcpp/utils/tinyformat.h>

#include "mumimo.hpp"
#include "gmmvb.hpp"

//[[Rcpp::export(name="vbFit")]]
Rcpp::List vbFit(const std::vector<double> &yVec, const int32_t &d, const int32_t &nPop, const double &alphaPr, const double &nuPr, const double &tauPr, const double &ppRatio){
if (nPop <= 1) {
Rcpp::stop("Number of populations must be greater than 1");
}
if (d <= 0) {
Rcpp::stop("Number of traits must be non-negative");
}
std::vector<double> theta;
std::vector<double> r;
std::vector<double> lBound;
try {
BayesicSpace::GmmVB(&yVec, ppRatio, nuPr, tauPr, alphaPr, static_cast<size_t>(nPop), static_cast<size_t>(d), &theta, &r);
} catch (std::string problem) {
Rcpp::stop(problem);
}
return Rcpp::List::create(Rcpp::Named("theta", theta), Rcpp::Named("responsibilities", r), Rcpp::Named("lowerBound", lBound));
}

//[[Rcpp::export(name="testLpostNR")]]
Rcpp::List testLpostNR(const std::vector<double> &yVec, const int32_t &d, const int32_t &Npop, std::vector<double> &theta, const std::vector<double> &P, const int32_t &ind, const double &limit, const double &incr){
Expand Down
168 changes: 168 additions & 0 deletions src/gmmvb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
* Copyright (c) 2020 Anthony J. Greenberg
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/

/// Variational inference of Gaussian mixture models
/** \file
* \author Anthony J. Greenberg
* \copyright Copyright (c) 2020 Anthony J. Greenberg
* \version 1.0
*
* Implementation of variational Bayes inference of Gaussian mixture models.
*
*/

#include <vector>
#include <string>

#include "gmmvb.hpp"
#include "matrixView.hpp"
#include "utilities.hpp"
#include "index.hpp"

using namespace BayesicSpace;
using std::vector;
using std::string;


GmmVB::GmmVB(const vector<double> *yVec, const double &lambda0, const double &nu0, const double &tau0, const double alpha0, const size_t &nPop, const size_t &d, vector<double> *theta, vector<double> *resp) : lambda0_{lambda0}, nu0_{nu0}, tau0_{tau0}, alpha0_{alpha0}, d_{static_cast<double>(d)}, nu0p2_{nu0 + 2.0}, nu0p1_{nu0 + 1.0}, nu0mdm1_{nu0 - d_ - 1.0}, maxIt_{100}, stoppingDiff_{1e-3} {
#ifndef PKG_DEBUG_OFF
if (yVec->size()%d) {
throw string("ERROR: Y dimensions not compatible with the number of traits supplied in the GmmVB constructor");
}
#endif
const size_t n = yVec->size()/d;
const size_t dSq = d*d;

// Set up correct dimensions
const size_t thetaDim = nPop*(d + dSq);
if (theta->size() != thetaDim) {
theta->clear();
theta->resize(thetaDim, 0.0);
}
const size_t rDim = nPop*n;
if (resp->size() != rDim) {
resp->clear();
resp->resize(rDim, 0.0);
}

// Set up the matrix views
Y_ = MatrixViewConst(yVec, 0, n, d);
R_ = MatrixView(resp, 0, n, nPop);
M_ = MatrixView(theta, 0, nPop, d);

size_t sigInd = nPop*d;
for (size_t m = 0; m < nPop; m++) {
Sinv_.push_back( MatrixView(theta, sigInd, d, d) );
sigInd += dSq;
}

// Initialize values with k-means
Index popInd(nPop);
kMeans_(Y_, nPop, 50, popInd, M_);
for (size_t m = 0; m < nPop; m++) {
for (size_t iRow = 0; iRow < n; iRow++) {
if (popInd.groupID(iRow) == m){
R_.setElem(iRow, m, 0.99);
} else {
R_.setElem( iRow, m, 0.01/static_cast<double>(nPop - 1) );
}
}
}

}

double GmmVB::rowDistance_(const MatrixViewConst &m1, const size_t &row1, const MatrixViewConst &m2, const size_t &row2){
#ifndef PKG_DEBUG_OFF
if ( m1.getNcols() != m2.getNcols() ) {
throw string("ERROR: m1 and m2 matrices must have the same number of columns in GmmVB::rowDist_()");
}
if ( row1+1 > m1.getNrows() ) {
throw string("ERROR: row1 index out of bounds in GmmVB::rowDist_()");
}
if ( row2+1 > m2.getNrows() ) {
throw string("ERROR: row2 index out of bounds in GmmVB::rowDist_()");
}
#endif
double dist = 0.0;
for (size_t jCol = 0; jCol < m1.getNcols(); jCol++) {
double diff = m1.getElem(row1, jCol) - m2.getElem(row2, jCol);
dist += diff*diff;
}
return sqrt(dist);
}

void GmmVB::kMeans_(const MatrixViewConst &X, const size_t &Kclust, const uint32_t &maxIt, Index &x2m, MatrixView &M){
#ifndef PKG_DEBUG_OFF
if (M.getNrows() != Kclust) {
throw string("ERROR: Matrix of means must have one row per cluster in GmmVB::kMeans_()");
}
if ( X.getNcols() != M.getNcols() ) {
throw string("ERROR: Matrix of observations must have the same number of columns as the matrix of means in GmmVB::kMeans_()");
}
if ( M.getNrows() != x2m.groupNumber() ) {
throw string("ERROR: observation to cluster index must be the same number of groups as the number of populations in GmmVB::kMeans_()");
}
#endif
// initialize M with a random pick of X rows (the MacQueen 1967 method)
size_t curXind = 0;
size_t curMind = 0;
double N = static_cast<double>( X.getNrows() ); // # of remaining rows
double n = static_cast<double>(Kclust); // # of clusters to be picked
while( curMind < M.getNrows() ){
curXind += rng_.vitter(n, N);
for (size_t jCol = 0; jCol < X.getNcols(); jCol++) {
M.setElem( curMind, jCol, X.getElem(curXind, jCol) );
}
n = n - 1.0;
N = N - static_cast<double>(curXind) + 1.0;
curMind++;
}
// Iterate the k-means algorithm
vector<size_t> sPrevious; // previous cluster assignment vector
vector<size_t> sNew(X.getNrows(), 0); // new cluster assignment vector
for (uint32_t i = 0; i < maxIt; i++) {
// save the previous S vector
sPrevious = sNew;
// assign cluster IDs according to minimal distance
for (size_t iRow = 0; iRow < X.getNrows(); iRow++) {
sNew[iRow] = 0;
double dist = rowDistance_(X, iRow, M, 0);
for (size_t iCl = 1; iCl < M.getNrows(); iCl++) {
double curDist = rowDistance_(X, iRow, M, iCl);
if (dist > curDist) {
sNew[iRow] = iCl;
dist = curDist;
}
}
}
x2m.update(sNew);
// recalculate cluster means
X.colMeans(x2m, M);
// calculate the magnitude of cluster assignment change
double nDiff = 0.0;
for (size_t i = 0; i < sNew.size(); i++) {
if (sNew[i] != sPrevious[i] ) {
nDiff++;
}
}
if ( ( nDiff/static_cast<double>( X.getNrows() ) ) <= 0.1 ) { // fewer than 10% of assignments changed
break;
}
}
}
161 changes: 161 additions & 0 deletions src/gmmvb.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* Copyright (c) 2020 Anthony J. Greenberg
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/

/// Variational inference of Gaussian mixture models
/** \file
* \author Anthony J. Greenberg
* \copyright Copyright (c) 2020 Anthony J. Greenberg
* \version 1.0
*
* Class definition and interface documentation for variational Bayes inference of Gaussian mixture models.
*
*/
#ifndef gmmvb_hpp
#define gmmvb_hpp

#include <vector>

#include "matrixView.hpp"
#include "utilities.hpp"
#include "random.hpp"
#include "index.hpp"

using std::vector;

namespace BayesicSpace {

/** \brief Variational Bayes class
*
* Implements Gaussian mixture model fit using variational Bayes
*/
class GmmVB {
public:
/** \brief Default constructor */
GmmVB() : lambda0_{0.0}, nu0_{0.0}, tau0_{0.0}, alpha0_{0.0}, d_{0.0}, nu0p2_{0.0}, nu0p1_{0.0}, nu0mdm1_{0.0}, maxIt_{0}, stoppingDiff_{0.0} {};
/** \brief Constructor
*
* The vectorized matrices must be in the column major format (as in R and FORTRAN). The `theta` vector must have the vectorized matrix of population means first, then each full population concentration matrix.
*
* \param[in] yVec pointer to vectorized data matrix
* \param[in] lambda0 prior precision scale factor
* \param[in] nu0 prior precision degrees of freedom
* \param[in] tau0 prior precision
* \param[in] alpha0 prior population size
* \param[in] nPop number of populations
* \param[in] d number of traits
* \param[in] theta pointer to vector of model parameters
* \param[in] resp pointer to vectorized matrix responsibilities
*/
GmmVB(const vector<double> *yVec, const double &lambda0, const double &nu0, const double &tau0, const double alpha0, const size_t &nPop, const size_t &d, vector<double> *theta, vector<double> *resp);
/** \brief Destructor */
~GmmVB(){};

/** \brief Copy constructor (deleted) */
GmmVB(const GmmVB &in) = delete;
/** \brief Copy assignment (deleted) */
GmmVB& operator=(const GmmVB &in) = delete;
/** \brief Move constructor
*
* \param[in] in object to move
*/
GmmVB(GmmVB &&in);
/** \brief Move assignment operator
*
* \param[in] in object to be moved
* \return target object
*/
GmmVB& operator=(GmmVB &&in);

/** \brief Fit model
*
* Fits the model, returning the lower bound values for each step.
*
* \param[out] lowerBound vector of lower bounds
*/
void fitModel(vector<double> &lowerBound) const;
private:
/** \brief Matrix view of the data */
MatrixViewConst Y_;
/** \brief Populaiton means matrix view */
MatrixView M_;
/** \brief Vector of inverse covariance matrix views */
vector<MatrixView> Sinv_;
/** \brief Matrix view of responsibilities */
MatrixView R_;

// constants
/** \brief Prior covariance scale factor */
const double lambda0_;
/** \brief Prior precision degrees of freedom */
const double nu0_;
/** \brief Prior precision */
const double tau0_;
/** \brief Prior population size */
const double alpha0_;
/** \brief Double version of the trait number */
const double d_;
/** \brief nu_0 + 2 */
const double nu0p2_;
/** \brief nu_0 + 1 */
const double nu0p1_;
/** \brief nu_0 - d - 1 */
const double nu0mdm1_;
/** \brief Maximum number of iterations */
const uint16_t maxIt_;
/** \brief Stopping criterion */
const double stoppingDiff_;

// Utilities
/** \brief Numerical utilities */
NumerUtil nuc_;
/** \brief Random numbers */
RanDraw rng_;

// Private functions
/** \brief The E-step */
void eStep_() const;
/** \brief The M-step
*
* \return variational lower bound
*/
double mStep_() const;
/** \brief Euclidean distance between matrix rows
*
* \param[in] m1 first matrix
* \param[in] row1 index of the first matrix row
* \param[in] m2 second matrix
* \param[in] row2 index of the second matrix row
* \return euclidean distance between the rows
*/
double rowDistance_(const MatrixViewConst &m1, const size_t &row1, const MatrixViewConst &m2, const size_t &row2);
/** \brief K-means clustering
*
* Performs k-means clustering on a matrix of values. Each row of the input matrix is an item with observed values in columns.
*
* \param[in] X matrix of observations to be clustered
* \param[in] Kclust number of clusters
* \param[in] maxIt maximum number of iterations
* \param[out] x2m `Index` relating clusters to values
* \param[out] M matrix of cluster means (clusters in rows)
*/
void kMeans_(const MatrixViewConst &X, const size_t &Kclust, const uint32_t &maxIt, Index &x2m, MatrixView &M);
};
}

#endif /* gmmvb_hpp */
4 changes: 2 additions & 2 deletions src/mumimo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/

/// Multiplicative mixture models
/// Gaussian mixture models
/** \file
* \author Anthony J. Greenberg
* \copyright Copyright (c) 2019 Anthony J. Greenberg
* \version 1.0
*
* Class implementation to generate Markov chains for inference from multiplicative Gaussian mixture models. Dual-averaging NUTS and Metropolis samplers for parameters groups are included within a Gibbs sampler.
* Class implementation to generate Markov chains for inference of Gaussian mixture models. Dual-averaging NUTS and Metropolis samplers for parameters groups are included within a Gibbs sampler.
*
*/

Expand Down
Loading

0 comments on commit ab9c3c5

Please sign in to comment.