From a9d3e1416ebce7a8e2128b5bf43677df4f0b1bae Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Thu, 26 Sep 2024 18:18:29 -0400 Subject: [PATCH] geneMatchR() could not handle big data - revert it --- R/projectR.R | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/R/projectR.R b/R/projectR.R index 38b87e2..3e8b9eb 100644 --- a/R/projectR.R +++ b/R/projectR.R @@ -86,18 +86,6 @@ setMethod("projectR",signature(data="dgCMatrix",loadings="matrix"),function( loadings<-loadings[,NP] } - #match genes in data sets - if(is.null(dataNames)){ - dataNames <- rownames(data) - } - if(is.null(loadingsNames)){ - loadingsNames <- rownames(loadings) - } - - dataM<-geneMatchR(data1=data, data2=loadings, data1Names=dataNames, data2Names=loadingsNames, merge=FALSE) - print(paste(as.character(dim(dataM[[2]])[1]),'row names matched between data and loadings')) - print(paste('Updated dimension of data:',as.character(paste(dim(dataM[[2]]), collapse = ' ')))) - print("dgCMatrix detected, projecting in chunks.") #columns of dgcMatrix are LHS for stats::lm, and columns of loadings are the #dense RHS (predictors). sometimes dgcMatrix is too big to fit RAM, so we @@ -113,11 +101,13 @@ setMethod("projectR",signature(data="dgCMatrix",loadings="matrix"),function( }) } #discard print statements projectR generates each time a chunk is called - invisible(capture.output( - projectionList <- lapply(chop(dataM[[2]]), function(i) { - projectR(as.matrix(dataM[[2]][,i]), dataM[[1]], full=full) + w <- invisible(capture.output( + projectionList <- lapply(chop(data), function(i) { + projectR(as.matrix(data[,i]), loadings, full=full) }) )) + #since chopping by columns, it's enough to print matching rows only once + print(w[1]) if(full==TRUE) { if(length(projectionList)==1) {#if only one chunk