Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

39 errors in projectr method for sparse matrix data #42

Merged
merged 13 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: projectR
Type: Package
Title: Functions for the projection of weights from PCA, CoGAPS, NMF,
correlation, and clustering
Version: 1.19.2
Version: 1.23.1
Author: Gaurav Sharma, Charles Shin, Jared Slosberg, Loyal Goff, Genevieve Stein-O'Brien
Authors@R: c(
person("Gaurav", "Sharma", role = c("aut")),
Expand All @@ -15,6 +15,7 @@ Authors@R: c(
Description: Functions for the projection of data into the spaces defined by PCA, CoGAPS, NMF, correlation, and clustering.
License: GPL (==2)
Imports:
SingleCellExperiment,
methods,
cluster,
stats,
Expand Down Expand Up @@ -50,7 +51,7 @@ Suggests:
SeuratObject
LazyData: TRUE
LazyDataCompression: gzip
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Encoding: UTF-8
VignetteBuilder: knitr
biocViews: FunctionalPrediction, GeneRegulation, BiologicalQuestion, Software
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ exportClasses(correlateR)
exportClasses(rotatoR)
import(MatrixModels)
import(RColorBrewer)
import(SingleCellExperiment)
import(cluster)
import(dplyr)
import(fgsea)
Expand Down
93 changes: 40 additions & 53 deletions R/projectR.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ setOldClass("prcomp")
#' @param NP vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
#' @param full logical indicating whether to return the full model solution. By default only the new pattern object is returned.
#' @param model Optional arguements to choose method for projection
#' @param family VGAM family function for model fitting (default: "gaussianff")
#' @param bootstrapPval logical to indicate whether to generate p-values using bootstrap, not available for prcomp and rotatoR objects
#' @param bootIter number of bootstrap iterations, default = 1000
#' @rdname projectR-methods
Expand All @@ -21,13 +20,12 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
loadingsNames = NULL, # a vector with names of loadings rows
NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
family="gaussianff", # VGAM family function (default: "gaussianff")
bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
bootIter=1e3 # No of bootstrap iterations
){

ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)
#if(!is.na(NP)){loadings<-loadings[,NP]} was giving warning with subset of patterns

#match genes in data sets
if(is.null(dataNames)){
dataNames <- rownames(data)
Expand All @@ -45,16 +43,8 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
projectionPatterns <- t(projection$coefficients)
projection.ts<-t(projection$coefficients/projection$stdev.unscaled/projection$sigma)

#projection<-vglm(dataM$data2 ~ 0 + dataM$data1,family=family)
#projectionPatterns<-coefvlm(projection,matrix.out=TRUE)

#For VGAM
#pval.matrix<-matrix(2*pnorm(-abs(summary(projection)@coef3[,3])),nrow=5,byrow=TRUE)

#For limma
pval.matrix<-2*pnorm(-abs(projection.ts))
#colnames(pval.matrix)<-colnames(projectionPatterns)
#rownames(pval.matrix)<-rownames(projectionPatterns)

if(bootstrapPval){
boots <- lapply(1:bootIter,function(x){
Expand All @@ -66,7 +56,6 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
}

if(full & bootstrapPval){
#projectionFit <- list('projection'=projectionPatterns, 'fit'=projection,'pval'=pval.matrix)
projectionFit <- list('projection'=projectionPatterns, 'pval'=pval.matrix, 'bootstrapPval' = bootPval)
return(projectionFit)
} else if(full){
Expand All @@ -82,60 +71,61 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
#' @param NP vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
#' @param full logical indicating whether to return the full model solution. By default only the new pattern object is returned.
#' @param model Optional arguements to choose method for projection
#' @param family VGAM family function for model fitting (default: "gaussianff")
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR",signature(data="dgCMatrix",loadings="matrix"),function(
data, # a dataset to be projected onto
loadings, # a matrix of continous values to be projected with unique rownames
dataNames = NULL, # a vector with names of data rows
loadingsNames = NULL, # a vector with names of loadings rows
NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
family="gaussianff" # VGAM family function (default: "gaussianff")
NP=NULL, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update comment to match new default

Copy link

@drbergman drbergman Nov 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, it's noteworthy to me that the default for NP is NA above and NULL here. Not sure if it's desired. just noting it

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, because is.na(NP) was never going to work if NP is specified:

> NP <- c(1,3,5)
> if(!is.na(NP)) {print("it's a bug")}
Error in if (!is.na(NP)) { : the condition has length > 1

is.null works fine:

> is.null(NP)
[1] FALSE

see line 85 too - I changed the check from is.NA to is.null too

full=FALSE # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
){

ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)
#if(!is.na(NP)){loadings<-loadings[,NP]} was giving warning with subset of patterns
#match genes in data sets
if(is.null(dataNames)){
dataNames <- rownames(data)
if(!is.null(NP)) {
loadings<-loadings[,NP]
}
if(is.null(loadingsNames)){
loadingsNames <- rownames(loadings)
}
dataM<-geneMatchR(data1=data, data2=loadings, data1Names=dataNames, data2Names=loadingsNames, merge=FALSE)
print(paste(as.character(dim(dataM[[2]])[1]),'row names matched between data and loadings'))
print(paste('Updated dimension of data:',as.character(paste(dim(dataM[[2]]), collapse = ' '))))
# do projection
Design <- model.matrix(~0 + dataM[[1]])
colnames(Design) <- colnames(dataM[[1]])
projection <- MatrixModels:::lm.fit.sparse(t(dataM[[2]]),dataM[[1]])
projectionPatterns <- t(projection$coefficients)
projection.ts<-t(projection$coefficients/projection$stdev.unscaled/projection$sigma)

#projection<-vglm(dataM$data2 ~ 0 + dataM$data1,family=family)
#projectionPatterns<-coefvlm(projection,matrix.out=TRUE)

#For VGAM
#pval.matrix<-matrix(2*pnorm(-abs(summary(projection)@coef3[,3])),nrow=5,byrow=TRUE)

#For limma
pval.matrix<-2*pnorm(-abs(projection.ts))
#colnames(pval.matrix)<-colnames(projectionPatterns)
#rownames(pval.matrix)<-rownames(projectionPatterns)
print("dgCMatrix detected, projecting in chunks.")
#columns of dgcMatrix are LHS for stats::lm, and columns of loadings are the
#dense RHS (predictors). sometimes dgcMatrix is too big to fit RAM, so we
#just fit chunks of lm models as supported by stats::lm/limma::lmFit

chop <- function(sparsematrix) {
coln <- ncol(sparsematrix)
bins <- seq(1, coln, by = 1000)
lapply(seq_along(bins), function(i) {
start <- bins[i]
end <- ifelse(i < length(bins), bins[i + 1] - 1, coln)
return(start:end)
})
}
#discard print statements projectR generates each time a chunk is called
w <- invisible(capture.output(
projectionList <- lapply(chop(data), function(i) {
projectR(as.matrix(data[,i]), loadings, full=full)
})
))
#since chopping by columns, it's enough to print matching rows only once
print(w[1])

if(full==TRUE){
#projectionFit <- list('projection'=projectionPatterns, 'fit'=projection,'pval'=pval.matrix)
projectionFit <- list('projection'=projectionPatterns, 'pval'=pval.matrix)
return(projectionFit)
if(full==TRUE) {
if(length(projectionList)==1) {#if only one chunk
res <- projectionList[[1]]
} else {
projectionFit <- lapply(projectionList, function(x) do.call(cbind, x))
res <- projectionFit
}
} else {
res <- do.call(cbind, projectionList)
}
else{return(projectionPatterns)}
return(res)
})


#######################################################################################################################################
#' @import limma
#' @import SingleCellExperiment
#' @importFrom NMF fcnnls
#' @examples
#' library("CoGAPS")
Expand All @@ -153,7 +143,6 @@ setMethod("projectR",signature(data="matrix",loadings="LinearEmbeddingMatrix"),f
NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
model=NA, # optional arguements to choose method for projection
family="gaussianff", # VGAM family function (default: "gaussianff")
bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
bootIter=1e3 # No of bootstrap iterations
){
Expand Down Expand Up @@ -256,10 +245,8 @@ setMethod("projectR",signature(data="matrix",loadings="rotatoR"),function(
Eigenvalues<-eigen(cov(projectionPatterns))$values
PercentVariance<-round(Eigenvalues/sum(Eigenvalues) * 100, digits = 2)

#PercentVariance<-apply(projectionPatterns,2, function(x) 100*var(x)/sum(apply(p2P,2,var)))

projectionFit <- list(t(projectionPatterns), PercentVariance)
return(projectionFit)
projectionFit <- list(t(projectionPatterns), PercentVariance)
return(projectionFit)
}
else{return(t(projectionPatterns))}

Expand Down
9 changes: 2 additions & 7 deletions man/projectR-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/test_projectR.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,28 @@ test_that("results are correctly formatted for P value mode",{

})

test_that("projection works on sparse data matrix", {
dense <- as.matrix(p.RNAseq6l3c3t)
sparse <- as(p.RNAseq6l3c3t, "sparseMatrix")
loadings <- CR.RNAseq6l3c3t@featureLoadings

expect_true("dgCMatrix" %in% class(sparse))
expect_no_error(projectR(sparse, loadings))

pdense <- projectR(dense, loadings)
psparse <- projectR(sparse, loadings)
expect_identical(pdense, psparse)
})

test_that("projection works on sparse data matrix with full=TRUE", {
dense <- as.matrix(p.RNAseq6l3c3t)
sparse <- as(p.RNAseq6l3c3t, "sparseMatrix")
loadings <- CR.RNAseq6l3c3t@featureLoadings

expect_true("dgCMatrix" %in% class(sparse))
expect_no_error(projectR(sparse, loadings))

pdense <- projectR(dense, loadings, full=TRUE)
psparse <- projectR(sparse, loadings, full=TRUE)
expect_identical(pdense, psparse)
})
Loading