finished adding missing data matrix functions

tonymugen · Sep 25, 2020 · f9817ab · f9817ab
1 parent 0083bd3
commit f9817ab
Show file tree

Hide file tree

Showing 5 changed files with 210 additions and 28 deletions.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,8 +1,8 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-selectTraits <- function(yVec, pVec, d, nPops, pi) {
-    .Call(`_MuGaMix_selectFeatures`, yVec, pVec, d, nPops, pi)
+tstMiss <- function(inVec, nRow, nCol) {
+    .Call(`_MuGaMix_tstMiss`, inVec, nRow, nCol)
 }
 
 testLpostNR <- function(yVec, d, Npop, theta, P, ind, limit, incr) {

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -5,18 +5,16 @@
 
 using namespace Rcpp;
 
-// selectFeatures
-Rcpp::List selectFeatures(const std::vector<double>& yVec, const std::vector<double>& pVec, const int32_t& d, const int32_t& nPops, const double& pi);
-RcppExport SEXP _MuGaMix_selectFeatures(SEXP yVecSEXP, SEXP pVecSEXP, SEXP dSEXP, SEXP nPopsSEXP, SEXP piSEXP) {
+// tstMiss
+Rcpp::List tstMiss(std::vector<double>& inVec, const int32_t& nRow, const int32_t& nCol);
+RcppExport SEXP _MuGaMix_tstMiss(SEXP inVecSEXP, SEXP nRowSEXP, SEXP nColSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::vector<double>& >::type yVec(yVecSEXP);
-    Rcpp::traits::input_parameter< const std::vector<double>& >::type pVec(pVecSEXP);
-    Rcpp::traits::input_parameter< const int32_t& >::type d(dSEXP);
-    Rcpp::traits::input_parameter< const int32_t& >::type nPops(nPopsSEXP);
-    Rcpp::traits::input_parameter< const double& >::type pi(piSEXP);
-    rcpp_result_gen = Rcpp::wrap(selectFeatures(yVec, pVec, d, nPops, pi));
+    Rcpp::traits::input_parameter< std::vector<double>& >::type inVec(inVecSEXP);
+    Rcpp::traits::input_parameter< const int32_t& >::type nRow(nRowSEXP);
+    Rcpp::traits::input_parameter< const int32_t& >::type nCol(nColSEXP);
+    rcpp_result_gen = Rcpp::wrap(tstMiss(inVec, nRow, nCol));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -322,7 +320,7 @@ END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_MuGaMix_selectFeatures", (DL_FUNC) &_MuGaMix_selectFeatures, 5},
+    {"_MuGaMix_tstMiss", (DL_FUNC) &_MuGaMix_tstMiss, 3},
     {"_MuGaMix_testLpostNR", (DL_FUNC) &_MuGaMix_testLpostNR, 8},
     {"_MuGaMix_testLpostP", (DL_FUNC) &_MuGaMix_testLpostP, 8},
     {"_MuGaMix_testLpostLocNR", (DL_FUNC) &_MuGaMix_testLpostLocNR, 8},

diff --git a/src/functions4R.cpp b/src/functions4R.cpp
@@ -27,7 +27,6 @@
  *
  */
 
-#include <cstddef>
 #include <vector>
 #include <cmath>
 #include <algorithm>
@@ -37,11 +36,18 @@
 #include <Rcpp.h>
 
 #include "Rcpp/Named.h"
-#include "Rcpp/utils/tinyformat.h"
 #include "matrixView.hpp"
 #include "mumimo.hpp"
 #include "gmmvb.hpp"
 
+//[[Rcpp::export(name="tstMiss")]]
+Rcpp::List tstMiss(std::vector<double> &inVec, const int32_t &nRow, const int32_t &nCol){
+	BayesicSpace::MatrixView test(&inVec, 0, nRow, nCol);
+	std::vector<double> out;
+	test.rowSumsMiss(out);
+	return Rcpp::List::create(Rcpp::Named("out", out));
+}
+
 //[[Rcpp::export(name="testLpostNR")]]
 Rcpp::List testLpostNR(const std::vector<double> &yVec, const int32_t &d, const int32_t &Npop, std::vector<double> &theta, const std::vector<double> &P, const int32_t &ind, const double &limit, const double &incr){
 	BayesicSpace::MumiNR test(&yVec, &P, d, Npop, 1e-8, 2.5, 1e-8);

diff --git a/src/matrixView.cpp b/src/matrixView.cpp
@@ -1533,13 +1533,13 @@ void MatrixView::rowSums(const Index &ind, MatrixView &out) const{
 void MatrixView::rowSumsMiss(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Ncol_) {
-		throw string("ERROR: Factor length not the same as number of columns in calling matrix in MatrixView::rowSums(const Index &, MatrixView &)");
+		throw string("ERROR: Factor length not the same as number of columns in calling matrix in MatrixView::rowSumsMiss(const Index &, MatrixView &)");
 	}
 	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
-		throw string("ERROR: one of the dimensions is zero MatrixView::rowSums(const Index &, MatrixView &)");
+		throw string("ERROR: one of the dimensions is zero MatrixView::rowSumsMiss(const Index &, MatrixView &)");
 	}
 	if ( (ind.groupNumber() != out.Ncol_) || (Nrow_ != out.Ncol_) ) {
-		throw string("ERROR: Index group number does not equal output column number in MatrixView::rowSums(const Index &, MatrixView &)");
+		throw string("ERROR: Index group number does not equal output column number in MatrixView::rowSumsMiss(const Index &, MatrixView &)");
 	}
 #endif
 	fill(out.data_->data()+out.idx_, out.data_->data() + out.idx_ + (out.Ncol_ * out.Nrow_), 0.0);
@@ -1617,13 +1617,13 @@ void MatrixView::rowMeans(const Index &ind, MatrixView &out) const{
 void MatrixView::rowMeansMiss(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Ncol_) {
-		throw string("ERROR: Factor length not the same as number of columns in calling matrix in MatrixView::rowMeans(const Index &, MatrixView &)");
+		throw string("ERROR: Factor length not the same as number of columns in calling matrix in MatrixView::rowMeansMiss(const Index &, MatrixView &)");
 	}
 	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
-		throw string("ERROR: one of the dimensions is zero MatrixView::rowMeans(const Index &, MatrixView &)");
+		throw string("ERROR: one of the dimensions is zero MatrixView::rowMeansMiss(const Index &, MatrixView &)");
 	}
 	if ( (ind.groupNumber() != out.Ncol_) || (Nrow_ != out.Ncol_) ) {
-		throw string("ERROR: Index group number does not equal output column number in MatrixView::rowMeans(const Index &, MatrixView &)");
+		throw string("ERROR: Index group number does not equal output column number in MatrixView::rowMeansMiss(const Index &, MatrixView &)");
 	}
 #endif
 	fill(out.data_->data()+out.idx_, out.data_->data() + out.idx_ + (out.Ncol_ * out.Nrow_), 0.0);
@@ -1724,13 +1724,13 @@ void MatrixView::colSums(const Index &ind, MatrixView &out) const{
 void MatrixView::colSumsMiss(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Nrow_) {
-		throw string("ERROR: Wrong total length of Index in MatrixView::colSums(const Index &, MatrixView &)");
+		throw string("ERROR: Wrong total length of Index in MatrixView::colSumsMiss(const Index &, MatrixView &)");
 	}
 	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
-		throw string("ERROR: one of the dimensions is zero MatrixView::colSums(const Index &, MatrixView &)");
+		throw string("ERROR: one of the dimensions is zero MatrixView::colSumsMiss(const Index &, MatrixView &)");
 	}
 	if ( (ind.groupNumber() != out.Nrow_) || (Ncol_ != out.Ncol_) ) {
-		throw string("ERROR: incorrect Index group number in MatrixView::colSums(const Index &, MatrixView &)");
+		throw string("ERROR: incorrect Index group number in MatrixView::colSumsMiss(const Index &, MatrixView &)");
 	}
 #endif
 
@@ -1807,13 +1807,13 @@ void MatrixView::colMeans(const Index &ind, MatrixView &out) const{
 void MatrixView::colMeansMiss(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Nrow_) {
-		throw string("ERROR: Wrong total length of Index in MatrixView::colMeans(const Index &, MatrixView &)");
+		throw string("ERROR: Wrong total length of Index in MatrixView::colMeansMiss(const Index &, MatrixView &)");
 	}
 	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
-		throw string("ERROR: one of the dimensions is zero MatrixView::colMeans(const Index &, MatrixView &)");
+		throw string("ERROR: one of the dimensions is zero MatrixView::colMeansMiss(const Index &, MatrixView &)");
 	}
 	if ( (ind.groupNumber() != out.Nrow_) || (Ncol_ != out.Ncol_) ) {
-		throw string("ERROR: incorrect Index group number in MatrixView::colMeans(const Index &, MatrixView &)");
+		throw string("ERROR: incorrect Index group number in MatrixView::colMeansMiss(const Index &, MatrixView &)");
 	}
 #endif
 
@@ -2865,6 +2865,21 @@ void MatrixViewConst::rowSums(vector<double> &sums) const{
 		}
 	}
 }
+void MatrixViewConst::rowSumsMiss(vector<double> &sums) const{
+	if (sums.size() < Nrow_) {
+		sums.resize(Nrow_);
+	}
+	for (size_t iRow = 0; iRow < Nrow_; iRow++) {
+		sums[iRow] = 0.0; // in case something was in the vector passed to the function and resize did not erase it
+		for (size_t jCol = 0; jCol < Ncol_; jCol++) {
+			// not necessarily mumerically stable. Revisit later
+			const size_t ind = idx_ + Nrow_ * jCol + iRow;
+			if ( !isnan(data_->data()[ind]) ) {
+				sums[iRow] += data_->data()[ind];
+			}
+		}
+	}
+}
 void MatrixViewConst::rowSums(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Ncol_) {
@@ -2889,6 +2904,34 @@ void MatrixViewConst::rowSums(const Index &ind, MatrixView &out) const{
 		}
 	}
 
+}
+void MatrixViewConst::rowSumsMiss(const Index &ind, MatrixView &out) const{
+#ifndef PKG_DEBUG_OFF
+	if (ind.size() != Ncol_) {
+		throw string("ERROR: Factor length not the same as number of columns in calling matrix in MatrixViewConst::rowSumsMiss(const Index &, MatrixView &)");
+	}
+	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
+		throw string("ERROR: one of the dimensions is zero MatrixViewConst::rowSumsMiss(const Index &, MatrixView &)");
+	}
+	if ( (ind.groupNumber() != out.Ncol_) || (Nrow_ != out.Ncol_) ) {
+		throw string("ERROR: Index group number does not equal output column number in MatrixViewConst::rowSumsMiss(const Index &, MatrixView &)");
+	}
+#endif
+	fill(out.data_->data()+out.idx_, out.data_->data() + out.idx_ + (out.Ncol_ * out.Nrow_), 0.0);
+
+	for (size_t newCol = 0; newCol < ind.groupNumber(); newCol++) {
+		// going through all the Index elements that correspond to the new column of out
+		for (auto &f : ind[newCol]) {
+			// summing the row elements of the current matrix with the group of columns
+			for (size_t iRow = 0; iRow < Nrow_; iRow++) {
+				const size_t addInd = idx_ + Nrow_ * f + iRow;
+				if ( !isnan(data_->data()[addInd]) ) {
+					out.data_->data()[out.idx_ + ind.groupNumber() * newCol + iRow] += data_->data()[addInd];
+				}
+			}
+		}
+	}
+
 }
 void MatrixViewConst::rowMeans(vector<double> &means) const{
 	if (means.size() < Nrow_) {
@@ -2902,6 +2945,23 @@ void MatrixViewConst::rowMeans(vector<double> &means) const{
 		}
 	}
 }
+void MatrixViewConst::rowMeansMiss(vector<double> &means) const{
+	if (means.size() < Nrow_) {
+		means.resize(Nrow_);
+	}
+	for (size_t iRow = 0; iRow < Nrow_; iRow++) {
+		means[iRow] = 0.0; // in case something was in the vector passed to the function and resize did not erase it
+		size_t jPres = 0;
+		for (size_t jCol = 0; jCol < Ncol_; jCol++) {
+			// numerically stable recursive mean calculation. GSL does it this way.
+			const size_t ind = idx_ + Nrow_ * jCol + iRow;
+			if ( !isnan(data_->data()[ind]) ){
+				means[iRow] += (data_->data()[ind] - means[iRow]) / static_cast<double>(jPres + 1);
+				jPres++;
+			}
+		}
+	}
+}
 void MatrixViewConst::rowMeans(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Ncol_) {
@@ -2930,6 +2990,36 @@ void MatrixViewConst::rowMeans(const Index &ind, MatrixView &out) const{
 	}
 
 }
+void MatrixViewConst::rowMeansMiss(const Index &ind, MatrixView &out) const{
+#ifndef PKG_DEBUG_OFF
+	if (ind.size() != Ncol_) {
+		throw string("ERROR: Factor length not the same as number of columns in calling matrix in MatrixViewConst::rowMeansMiss(const Index &, MatrixView &)");
+	}
+	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
+		throw string("ERROR: one of the dimensions is zero MatrixViewConst::rowMeansMiss(const Index &, MatrixView &)");
+	}
+	if ( (ind.groupNumber() != out.Ncol_) || (Nrow_ != out.Ncol_) ) {
+		throw string("ERROR: Index group number does not equal output column number in MatrixViewConst::rowMeansMiss(const Index &, MatrixView &)");
+	}
+#endif
+	fill(out.data_->data()+out.idx_, out.data_->data() + out.idx_ + (out.Ncol_ * out.Nrow_), 0.0);
+
+	for (size_t newCol = 0; newCol < ind.groupNumber(); newCol++) {
+		// going through all the Index elements that correspond to the new column of out
+		vector<double> denom(Nrow_, 1.0);
+		for (auto &f : ind[newCol]) {
+			// summing the row elements of the current matrix with the group of columns
+			for (size_t iRow = 0; iRow < Nrow_; iRow++) {
+				const size_t curInd = out.idx_ + ind.groupNumber() * newCol + iRow;
+				const size_t datInd = idx_ + Nrow_ * f + iRow;
+				if ( !isnan(data_->data()[datInd]) ) {
+					out.data_->data()[curInd] += (data_->data()[datInd] - out.data_->data()[curInd]) / denom[iRow];
+					denom[iRow] += 1.0;
+				}
+			}
+		}
+	}
+}
 
 void MatrixViewConst::colExpand(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
@@ -2967,6 +3057,21 @@ void MatrixViewConst::colSums(vector<double> &sums) const{
 		}
 	}
 }
+void MatrixViewConst::colSumsMiss(vector<double> &sums) const{
+	if (sums.size() < Ncol_) {
+		sums.resize(Ncol_);
+	}
+	for (size_t jCol = 0; jCol < Ncol_; jCol++) {
+		sums[jCol] = 0.0; // in case something was in the vector passed to the function and resize did not erase it
+		for (size_t iRow = 0; iRow < Nrow_; iRow++) {
+			// not necessarily numerically stable. Revisit later
+			const size_t ind = idx_ + Nrow_ * jCol + iRow;
+			if ( !isnan(data_->data()[ind]) ) {
+				sums[jCol] += data_->data()[ind];
+			}
+		}
+	}
+}
 void MatrixViewConst::colSums(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Nrow_) {
@@ -2993,6 +3098,34 @@ void MatrixViewConst::colSums(const Index &ind, MatrixView &out) const{
 	}
 
 }
+void MatrixViewConst::colSumsMiss(const Index &ind, MatrixView &out) const{
+#ifndef PKG_DEBUG_OFF
+	if (ind.size() != Nrow_) {
+		throw string("ERROR: Wrong total length of Index in MatrixViewConst::colSumsMiss(const Index &, MatrixView &)");
+	}
+	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
+		throw string("ERROR: one of the dimensions is zero MatrixViewConst::colSumsMiss(const Index &, MatrixView &)");
+	}
+	if ( (ind.groupNumber() != out.Nrow_) || (Ncol_ != out.Ncol_) ) {
+		throw string("ERROR: incorrect Index group number in MatrixViewConst::colSumsMiss(const Index &, MatrixView &)");
+	}
+#endif
+
+	fill(out.data_->data()+out.idx_, out.data_->data() + out.idx_ + (out.Ncol_ * out.Nrow_), 0.0);
+
+	for (size_t newRow = 0; newRow < ind.groupNumber(); newRow++) {
+		// going through all the rows of Z that correspond to the new row of M
+		for (auto &f : ind[newRow]) {
+			// summing the rows of M within the group defined by rows of Z
+			for (size_t jCol = 0; jCol < Ncol_; jCol++) {
+				const size_t curInd = idx_ + Nrow_ * jCol + f;
+				if ( !isnan(data_->data()[curInd]) ){
+					out.data_->data()[out.idx_ + ind.groupNumber() * jCol + newRow] += data_->data()[curInd];
+				}
+			}
+		}
+	}
+}
 void MatrixViewConst::colMeans(vector<double> &means) const{
 	if (means.size() < Ncol_) {
 		means.resize(Ncol_);
@@ -3005,6 +3138,23 @@ void MatrixViewConst::colMeans(vector<double> &means) const{
 		}
 	}
 }
+void MatrixViewConst::colMeansMiss(vector<double> &means) const{
+	if (means.size() < Ncol_) {
+		means.resize(Ncol_);
+	}
+	for (size_t jCol = 0; jCol < Ncol_; jCol++) {
+		means[jCol] = 0.0; // in case something was in the vector passed to the function and resize did not erase it
+		size_t iPres = 0;
+		for (size_t iRow = 0; iRow < Nrow_; iRow++) {
+			// numerically stable recursive mean calculation. GSL does it this way.
+			const size_t curInd  = idx_ + Nrow_ * jCol + iRow;
+			if ( !isnan(data_->data()[curInd]) ) {
+				means[jCol] += (data_->data()[curInd] - means[jCol]) / static_cast<double>(iPres + 1);
+				iPres++;
+			}
+		}
+	}
+}
 void MatrixViewConst::colMeans(const Index &ind, MatrixView &out) const{
 #ifndef PKG_DEBUG_OFF
 	if (ind.size() != Nrow_) {
@@ -3032,4 +3182,33 @@ void MatrixViewConst::colMeans(const Index &ind, MatrixView &out) const{
 	}
 
 }
+void MatrixViewConst::colMeansMiss(const Index &ind, MatrixView &out) const{
+#ifndef PKG_DEBUG_OFF
+	if (ind.size() != Nrow_) {
+		throw string("ERROR: Wrong total length of Index in MatrixViewConst::colMeansMiss(const Index &, MatrixView &)");
+	}
+	if ( (Nrow_ == 0) || (Ncol_ == 0) ) {
+		throw string("ERROR: one of the dimensions is zero MatrixViewConst::colMeansMiss(const Index &, MatrixView &)");
+	}
+	if ( (ind.groupNumber() != out.Nrow_) || (Ncol_ != out.Ncol_) ) {
+		throw string("ERROR: incorrect Index group number in MatrixViewConst::colMeansMiss(const Index &, MatrixView &)");
+	}
+#endif
+
+	fill(out.data_->data()+out.idx_, out.data_->data() + out.idx_ + (out.Ncol_ * out.Nrow_), 0.0);
+
+	for (size_t newRow = 0; newRow < ind.groupNumber(); newRow++) {
+		vector<double> denom(Ncol_, 1.0);
+		for (auto &f : ind[newRow]) {
+			for (size_t jCol = 0; jCol < Ncol_; jCol++) {
+				const size_t mnInd = out.idx_ + ind.groupNumber() * jCol + newRow;
+				const size_t dtInd = idx_ + Nrow_ * jCol + f;
+				if ( !isnan(data_->data()[dtInd]) ) {
+					out.data_->data()[mnInd] += (data_->data()[dtInd] - out.data_->data()[mnInd]) / denom[jCol];
+					denom[jCol] += 1.0;
+				}
+			}
+		}
+	}
+}
 
diff --git a/src/matrixView.hpp b/src/matrixView.hpp
@@ -1278,6 +1278,5 @@ namespace BayesicSpace {
 		size_t Ncol_;
 	};
 }
-
-
 #endif /* matrixView_hpp */
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -1278,6 +1278,5 @@ namespace BayesicSpace { @@
     		size_t Ncol_;
     	};
     }
     #endif /* matrixView_hpp */