Skip to content

Commit

Permalink
Merge branch 'release/v0.0.3'
Browse files Browse the repository at this point in the history
  • Loading branch information
drisso committed May 3, 2016
2 parents 621ff1b + 180125b commit ac57693
Show file tree
Hide file tree
Showing 38 changed files with 1,277 additions and 1,167 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.Rproj.user
.Rhistory
.RData
vignettes/figure
*.Rproj
vignettes/R_cache
vignettes/R_figure
old_scripts
33 changes: 28 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,16 +1,39 @@
Package: scone
Version: 0.0.1
Version: 0.0.3
Title: Single Cell Overview of Normalized Expression data
Description: SCONE.
Description: scone is a package to compare and rank the performance of different normalization schemes in real single-cell RNA-seq datasets.
Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com",
role = c("aut", "cre", "cph")),
person("Davide", "Risso", email = "risso.davide@gmail.com",
role = c("aut")))
Author: Michael Cole [aut, cre, cph] and Davide Risso [aut]
Maintainer: Michael Cole <mbeloc@gmail.com>
Imports: BiocParallel, DESeq, EDASeq, MASS, RUVSeq, aroma.light, class,
diptest, edgeR, fpc, gplots, limma, matrixStats, mixtools, scde
Date: 02-06-2016
Date: 2016-02-14
License: Artistic-2.0
Depends:
R (>= 3.1)
Imports:
BiocParallel,
clusterCells,
DESeq,
EDASeq,
MASS,
RUVSeq,
aroma.light,
class,
diptest,
edgeR,
fpc,
gplots,
limma,
matrixStats,
mixtools,
scde
Suggests:
knitr,
rmarkdown,
testthat
VignetteBuilder: knitr
LazyLoad: yes
BugReports: https://github.com/epurdom/clusterExperiment/issues
RoxygenNote: 5.0.1
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ export(FQ_FN_POS)
export(TMM_FN)
export(UQ_FN)
export(UQ_FN_POS)
export(biplot_colored)
export(estimate_ziber)
export(estimate_zinb)
export(factor_sample_filter)
export(impute_ziber_simp)
export(impute_zinb)
export(lm_adjust)
export(make_design)
Expand All @@ -23,9 +26,11 @@ importFrom(MASS,glm.nb)
importFrom(RUVSeq,RUVg)
importFrom(aroma.light,normalizeQuantileRank.matrix)
importFrom(class,knn)
importFrom(clusterCells,subsampleClustering)
importFrom(diptest,dip.test)
importFrom(edgeR,calcNormFactors)
importFrom(fpc,pamk)
importFrom(grDevices,colorRampPalette)
importFrom(limma,lmFit)
importFrom(matrixStats,rowMedians)
importFrom(mixtools,normalmixEM)
Expand Down
10 changes: 4 additions & 6 deletions R/SCONE_DEFAULTS.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ FQ_FN_POS = function(ei){
quant_mat = NULL
# Re-ordered Data Matrix
x_mat = NULL
print("Sorting Matrix...")
# For each sample:
for (i in 1:dim(ei)[2]){
# Sort data and replace zeroes with NA
Expand All @@ -79,8 +78,6 @@ FQ_FN_POS = function(ei){

# Vector form of quantile index matrix
quant_out = as.numeric(as.vector(quant_mat))
print("Complete.")
print("Spline Interpolation...")
# Interpolation Matrix (Values of all quantiles)
inter_mat = rep(0,length(quant_out))
ob_counts = rep(0,length(quant_out)) # Number of observations for averaging
Expand All @@ -95,8 +92,7 @@ FQ_FN_POS = function(ei){
inter[is.na(inter)] = 0
inter_mat = inter_mat + inter
}
print("Complete.")


# Average over the interpolated values from all samples
inter_mean = inter_mat/ob_counts

Expand All @@ -106,7 +102,9 @@ FQ_FN_POS = function(ei){
for (i in 1:dim(ei)[2]){
eo[,i] = rev(eo[,i])[order(order(ei[,i]))]
}
print("Finished!")

rownames(eo) = rownames(ei)
colnames(eo) = colnames(ei)
return(eo)
}

Expand Down
49 changes: 49 additions & 0 deletions R/biplot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
## When porting the package to S4, we can make this a biplot method

#' Another implementation of the biplot function
#'
#' Function to plot a biplot with no point labels and with points color-coded
#' according to a certain quantitative variable, for instance the rank of the
#' normalization performance or the expression of a certain gene.
#'
#' This function implements the biplot only for \code{\link[stats]{prcomp}}
#' objects. Eventually, we will turn this into an S4 method.
#'
#' @param x the result of a call to \code{\link[stats]{prcomp}}.
#' @param y the rank value that should be used to color the points.
#' @param choices which principal components to plot. Only 2D plots are
#' possible for now. Default to first two PCs.
#' @param expand numeric value used to adjust the spread of the arrows relative
#' to the points.
#' @param ... passed to plot.
#'
#' @importFrom grDevices colorRampPalette
#' @export
#'
#' @examples
#' mat <- matrix(rnorm(1000), ncol=10)
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
#'
#' pc <- prcomp(mat)
#'
#' biplot_colored(pc, rank(pc$x[,1]))
#'
biplot_colored <- function(x, y, choices=1:2, expand=1, ...) {

lam <- x$sdev[choices]
n <- NROW(x$x)
lam <- lam * sqrt(n)

xx <- t(t(x$x[, choices])/lam)
yy <- t(t(x$rotation[, choices]) * lam)

ratio <- max(range(yy)/range(xx))/expand

cols <- rev(colorRampPalette(c("black","navyblue","mediumblue","dodgerblue3","aquamarine4","green4","yellowgreen","yellow"))(length(y)))[y]
plot(xx, pch=19, col=cols, ...)

labs <- rownames(yy)

text(yy/ratio, labels=labs, col=2)
arrows(0, 0, yy[, 1] * 0.8/ratio, yy[, 2] * 0.8/ratio, col = 2, length = 0.1)
}
48 changes: 48 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#' Positive and negative control genes
#'
#' Sets of positive and negative control genes, useful for input in
#' \code{\link{scone}}.
#'
#' These gene sets can be used as negative or positive controls, either for RUV
#' normalization or for evaluation and ranking of the normalization strategies.
#'
#' @details The datasets are in the form of \code{data.frame}, with at least one
#' column containing the gene symbols and optionally a second column
#' containing additional information (such as cortical layer or cell cycle
#' phase).
#'
#' @details Note that the gene symbols follow the mouse conventions (i.e.,
#' capitalized) or the human conventions (i.e, all upper-case), based on the
#' original pubblication. One can use the \code{\link[base]{toupper}},
#' \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} functions
#' to convert the symbols.
#'
#' @details The genes in \code{cortical_markers} are from Figure 3 of Molyneaux
#' et al. (2007). The genes in \code{housekeeping} are from Eisenberg and
#' Levanon (2003) and in \code{housekeeping_revised} are from Eisenberg and
#' Levanon (2013). The genes in \code{cellcycle_genes} are adapted from
#' Kowalczyk et al. (2015).
#'
#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D..
#' Neuronal subtype specification in the cerebral cortex. Nature Reviews
#' Neuroscience, 2007, 8(6):427-437.
#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact.
#' Trends in Genetics, 2003, 19(7):362-5.
#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited.
#' Trends in Genetics, 2013, 29(10):569-74.
#' @references Kowalczyk, M.S., Tirosh, I., Heckl, D., Rao, T.N., Dixit, A.,
#' Haas, B.J., Schneider, R.K., Wagers, A.J., Ebert, B.L. and Regev, A.
#' Single-cell RNA-seq reveals changes in cell cycle and differentiation
#' programs upon aging of hematopoietic stem cells. Genome research, 2015.
#'
#' @name control_genes
#'
#' @docType data
#' @aliases cortical_markers housekeeping housekeeping_revised cellcycle_genes
#'
#' @examples
#' data(housekeeping)
#' data(housekeeping_revised)
#' data(cellcycle_genes)
#' data(cortical_markers)
NULL
54 changes: 27 additions & 27 deletions R/helper.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
#' Internal Function
#'
#'
#' This function is used internally in scone to parse the variables used to generate the design matrices.
#'
#'
#' @param pars character. A vector of parameters corresponding to a row of params.
#' @param bio factor. The biological factor of interest.
#' @param batch factor. The known batch effects.
#' @param ruv_factors list. A list containing the factors of unwanted variation.
#' @param qc matrix. The principal components of the QC metrics.
#'
#'
#' @return A list with the variables to be passed to make_design.
parse_row <- function(pars, bio, batch, ruv_factors, qc) {
sc_name <- paste(pars[1:2], collapse="_")

W <- out_bio <- out_batch <- NULL

if(pars[3]!="no_uv") {
parsed <- strsplit(as.character(pars[3]), "=")[[1]]
if(grepl("ruv", parsed[1])) {
Expand All @@ -22,34 +22,34 @@ parse_row <- function(pars, bio, batch, ruv_factors, qc) {
W <- qc[,1:as.numeric(parsed[2])]
}
}

if(pars[4]=="bio") {
out_bio <- bio
}

if(pars[5]=="batch") {
out_batch <- batch
}

return(list(sc_name=sc_name, W=W, bio=out_bio, batch=out_batch))
}

#' Function to make a design matrix
#'
#'
#' This function is useful to create a design matrix, when the covariates are two (possibly nested) factors
#' and one or more continuous variables.
#'
#'
#' @details If nested=TRUE a nested design is used, i.e., the batch variable is assumed to be nested within
#' the bio variable. Here, nested means that each batch is made of observations from only one level of bio,
#' while each level of bio may contain multiple batches.
#'
#'
#' @export
#'
#'
#' @param bio factor. The biological factor of interest.
#' @param batch factor. The known batch effects.
#' @param W numeric. Either a vector or matrix containing one or more continuous covariates (e.g. RUV factors).
#' @param nested logical. Whether or not to consider a nested design (see details).
#'
#'
#' @return The design matrix.
make_design <- function(bio, batch, W, nested=FALSE) {
if(nested & (is.null(bio) | is.null(batch))) {
Expand All @@ -65,7 +65,7 @@ make_design <- function(bio, batch, W, nested=FALSE) {
stop("batch must be a factor.")
}
}

f <- "~ 1"
if(!is.null(bio)) {
f <- paste(f, "bio", sep="+")
Expand All @@ -76,12 +76,12 @@ make_design <- function(bio, batch, W, nested=FALSE) {
if(!is.null(W)) {
f <- paste(f, "W", sep="+")
}

if(is.null(bio) & is.null(batch) & is.null(W)) {
return(NULL)
} else if (!is.null(bio) & !is.null(batch) & nested) {
n_vec <- tapply(batch, bio, function(x) nlevels(droplevels(x)))

mat = matrix(0,nrow = sum(n_vec),ncol = sum(n_vec - 1))
xi = 1
yi = 1
Expand All @@ -96,25 +96,25 @@ make_design <- function(bio, batch, W, nested=FALSE) {
xi = xi + 1
}
}
return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))

return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))
} else {
return(model.matrix(as.formula(f)))
return(model.matrix(as.formula(f)))
}
}

#' Function to perform linear batch effect correction
#'
#'
#' Given a matrix with log expression values and a design matrix, this function fits a linear model
#' and removes the effects of the batch factor as well as of the linear variables encoded in W.
#'
#'
#' @details The function assumes that the columns of the design matrix corresponding to the variable
#' for which expression needs to be adjusted, start with either the word "batch" or the letter "W" (case sensitive).
#' Any other covariate (including the intercept) is kept.
#'
#'
#' @importFrom limma lmFit
#' @export
#'
#'
#' @param log_expr matrix. The log gene expression (genes in row, samples in columns).
#' @param design_mat matrix. The design matrix (usually the result of make_design).
#' @param batch factor. A factor with the batch information.
Expand All @@ -125,13 +125,13 @@ lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {

uvind <- grep("^W", colnames(design_mat))
bind <- grep("^batch", colnames(design_mat))

if(length(uvind)) {
uv_term <- t(design_mat[,uvind] %*% t(lm_object$coefficients[,uvind]))
} else {
uv_term <- 0
}

if(length(bind)) {
if(is.character(attr(design_mat,"contrasts")$batch)) {
contr <- get(attr(design_mat,"contrasts")$batch)(nlevels(batch))
Expand All @@ -142,6 +142,6 @@ lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
} else {
batch_term <- 0
}
log_norm <- log_expr - batch_term - uv_term

return(log_expr - batch_term - uv_term)
}
Loading

0 comments on commit ac57693

Please sign in to comment.