Skip to content

Commit

Permalink
Merge branch 'release/0.0.6'
Browse files Browse the repository at this point in the history
  • Loading branch information
drisso committed Jul 22, 2016
2 parents ee405e9 + 1ca23e3 commit c7b195b
Show file tree
Hide file tree
Showing 26 changed files with 901 additions and 408 deletions.
6 changes: 5 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ README[.]md
^[.]devel
^[.]test
^[.]check
.Rhistory
R/.Rhistory

#----------------------------
# Temp scripts
#----------------------------
^old_scripts/*
^old_scripts/*
^.*\.Rproj$
^\.Rproj\.user$
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ vignettes/figure
*.Rproj
vignettes/R_cache
vignettes/R_figure
old_scripts
old_scripts
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ language: r
cache: packages

# R versions to be tested on
r:
r:
- bioc-release
- bioc-devel

Expand Down
27 changes: 16 additions & 11 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,39 +1,44 @@
Package: scone
Version: 0.0.5
Version: 0.0.6
Title: Single Cell Overview of Normalized Expression data
Description: scone is a package to compare and rank the performance of different normalization schemes in real single-cell RNA-seq datasets.
Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com",
role = c("aut", "cre", "cph")),
person("Davide", "Risso", email = "risso.davide@gmail.com",
role = c("aut")))
Author: Michael Cole [aut, cre, cph], Davide Risso [aut]
role = c("aut", "cph")))
Author: Michael Cole [aut, cre, cph], Davide Risso [aut, cph]
Maintainer: Michael Cole <mbeloc@gmail.com>
Date: 2016-05-12
Date: 2016-07-22
License: Artistic-2.0
Depends:
R (>= 3.3)
Imports:
aroma.light,
BiocParallel,
class,
cluster,
DESeq,
EDASeq,
MASS,
RUVSeq,
aroma.light,
class,
diptest,
EDASeq,
edgeR,
fpc,
gplots,
grDevices,
limma,
MASS,
matrixStats,
mixtools,
grDevices
grDevices,
boot,
shiny,
miniUI,
rhdf5,
RUVSeq
Suggests:
knitr,
rmarkdown,
testthat
VignetteBuilder: knitr
LazyLoad: yes
BugReports: https://github.com/epurdom/clusterExperiment/issues
BugReports: https://github.com/YosefLab/scone/issues
RoxygenNote: 5.0.1
23 changes: 22 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ export(TMM_FN)
export(UQ_FN)
export(UQ_FN_POS)
export(biplot_colored)
export(biplot_interactive)
export(estimate_ziber)
export(estimate_zinb)
export(factor_sample_filter)
export(impute_ziber_simp)
export(get_normalized)
export(impute_expectation)
export(impute_null)
export(impute_zinb)
export(lm_adjust)
export(make_design)
Expand All @@ -26,6 +29,8 @@ importFrom(EDASeq,betweenLaneNormalization)
importFrom(MASS,glm.nb)
importFrom(RUVSeq,RUVg)
importFrom(aroma.light,normalizeQuantileRank.matrix)
importFrom(boot,inv.logit)
importFrom(boot,logit)
importFrom(class,knn)
importFrom(cluster,silhouette)
importFrom(diptest,dip.test)
Expand All @@ -43,7 +48,23 @@ importFrom(limma,lmFit)
importFrom(matrixStats,colIQRs)
importFrom(matrixStats,colMedians)
importFrom(matrixStats,rowMedians)
importFrom(miniUI,gadgetTitleBar)
importFrom(miniUI,miniContentPanel)
importFrom(miniUI,miniPage)
importFrom(mixtools,normalmixEM)
importFrom(rhdf5,h5createFile)
importFrom(rhdf5,h5ls)
importFrom(rhdf5,h5read)
importFrom(rhdf5,h5write)
importFrom(rhdf5,h5write.default)
importFrom(shiny,brushedPoints)
importFrom(shiny,observeEvent)
importFrom(shiny,plotOutput)
importFrom(shiny,renderPlot)
importFrom(shiny,renderText)
importFrom(shiny,runGadget)
importFrom(shiny,stopApp)
importFrom(shiny,verbatimTextOutput)
importFrom(stats,approx)
importFrom(stats,as.formula)
importFrom(stats,binomial)
Expand Down
72 changes: 72 additions & 0 deletions R/biplot_interactive.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#' Interactive biplot
#'
#' This is a wrapper around \code{\link{biplot_colored}}, which creates a shiny
#' gadget to allow the user to select specific points in the graph.
#'
#' @details Since this is based on the shiny gadget feature, it will not work in
#' static documents, such as vignettes or markdown / knitr documents.
#' See \code{biplot_colored} for more details on the internals.
#'
#' @param data a data.frame containing the data to be plotted.
#' @param scores a numeric vector used to color the points.
#'
#' @importFrom miniUI gadgetTitleBar miniContentPanel miniPage gadgetTitleBar
#' @importFrom shiny plotOutput renderPlot observeEvent brushedPoints runGadget verbatimTextOutput stopApp renderText
#'
#' @export
#'
#' @examples
#' \dontrun{
#' mat <- matrix(rnorm(1000), ncol=10)
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
#'
#' biplot_interactive(mat, mat[,1])
#' }
biplot_interactive <- function(data, scores, ...) {

data <- as.data.frame(data)
scores <- as.numeric(scores)

ui <- miniPage(
gadgetTitleBar("Drag to select points"),
miniContentPanel(
# The brush="brush" argument means we can listen for
# brush events on the plot using input$brush.
plotOutput("plot1", height = "80%", brush = "plot_brush"),
verbatimTextOutput("info")
)
)

server <- function(input, output, session) {

# Compute PCA
pc_obj <- prcomp(data, center = TRUE, scale = FALSE)
bp_obj <- biplot_colored(pc_obj, y = scores)

# Render the plot
output$plot1 <- renderPlot({
# Biplot
biplot_colored(pc_obj, y = scores, ...)
})

data_out <- cbind(data, bp_obj)

output$info <- renderText({
xy_range_str <- function(e) {
if(is.null(e)) return("NULL\n")
idx <- which(bp_obj[,1] >= e$xmin & bp_obj[,1] <= e$xmax &
bp_obj[,2] >= e$ymin & bp_obj[,2] <= e$ymax)
paste0(rownames(data)[idx], collapse = "\n")
}
xy_range_str(input$plot_brush)
})

# Handle the Done button being pressed.
observeEvent(input$done, {
# Return the brushed points. See ?shiny::brushedPoints.
stopApp(brushedPoints(data_out, input$plot_brush, xvar="PC1", yvar="PC2"))
})
}

runGadget(ui, server)
}
93 changes: 55 additions & 38 deletions R/sample_filtering.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,53 @@
#' Fit Logistic Regression Model of FNR against set of positive control (ubiquitously expressed) genes
#'
#' @details logit(Probability of False Negative) ~ a + b*(mean log10p1 expression) .
#' @details logit(Probability of False Negative) ~ a + b*(median log-expression) .
#'
#' @param expr matrix The data matrix in transcript-proportional units (genes in rows, cells in columns).
#' @param pos_controls A logical vector indexing positive control genes that will be used to compute false-negative rate characteristics.
#' User must provide at least 2 positive control genes.
#' @param fn_tresh Inclusive threshold for negative detection. Default 0.01.
#' fn_tresh must be non-negative.
#'
#' @return A list of logistic regression coefficients corresponding to glm fits in each sample. If a fit did not converge, the result reported is NA.
#' @return A matrix of logistic regression coefficients corresponding to glm fits in each sample (a and b in columns 1 and 2 respectively). If the a & b fit does not converge, b is set to zero and only a is estimated.
#'
#' @importFrom boot logit
#' @importFrom matrixStats rowMedians
#'
simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){

# Mean log10p1 expression
mu_obs = rowMeans(log10(expr[pos_controls,]+1))

# Drop-outs
drop_outs = 0 + (expr[pos_controls,] <= fn_tresh)
stopifnot(!any(is.na(pos_controls)))

if (sum(pos_controls) < 2){
stop("User must provide at least 2 positive control genes")
}

if (fn_tresh < 0){
stop("fn_tresh must be non-negative")
}

pos_expr = expr[pos_controls,] # Selecting positive-control genes
is_drop = pos_expr <= fn_tresh # Identify false negatives
pos_expr[is_drop] = NA # Set false negatives to NA
drop_outs = 0 + is_drop # Numeric drop-out state
drop_rate = colMeans(drop_outs) # Total drop-out rate per sample

# Median log-expression in positive observations
mu_obs = log(rowMedians(pos_expr,na.rm = TRUE))
if(any(is.na(mu_obs))){
stop("Median log-expression in positive observations NA for some positive control gene/s")
}

# Logistic Regression Model of FNR
ref.glms = list()
for (si in 1:dim(drop_outs)[2]){
logistic_coef = matrix(0,ncol(drop_outs),2)
for (si in seq_len(ncol(drop_outs))){
fit = suppressWarnings(glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,family=binomial(logit)))
if(fit$converged){
ref.glms[[si]] = fit$coefficients
logistic_coef[si,] = fit$coefficients
} else {
ref.glms[[si]] = NA
logistic_coef[si,1] = logit(drop_rate[si])
}
}
return(ref.glms)
return(logistic_coef)
}

#' metric-based sample filtering: function to filter single-cell RNA-Seq libraries.
Expand All @@ -51,7 +72,7 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
#' If NULL, filtered_fnr will be returned NA.
#' @param scale. logical. Will expression be scaled by total expression for FNR computation? Default = FALSE
#' @param glen Gene lengths for gene-length normalization (normalized data used in FNR computation).
#' @param AUC_range An array of two values, representing range over which FNR AUC will be computed (log10(expr_units + 1)). Default c(0,6)
#' @param AUC_range An array of two values, representing range over which FNR AUC will be computed (log(expr_units)). Default c(0,15)
#' @param zcut A numeric value determining threshold Z-score for sd, mad, and mixture sub-criteria. Default 1.
#' If NULL, only hard threshold sub-criteria will be applied.
#' @param mixture A logical value determining whether mixture modeling sub-criterion will be applied per primary criterion (metric).
Expand All @@ -61,11 +82,11 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
#' @param hard_nreads numeric. Hard (lower bound on) nreads threshold. Default 25000.
#' @param hard_ralign numeric. Hard (lower bound on) ralign threshold. Default 15.
#' @param hard_breadth numeric. Hard (lower bound on) breadth threshold. Default 0.2.
#' @param hard_fnr numeric. Hard (upper bound on) fnr threshold. Default 3.
#' @param suff_nreads numeric. If not null, serves as an upper bound on nreads threshold.
#' @param suff_ralign numeric. If not null, serves as an upper bound on ralign threshold. Default 65.
#' @param suff_breadth numeric. If not null, serves as an upper bound on breadth threshold. Default 0.8.
#' @param suff_fnr numeric. If not null, serves as an lower bound on fnr threshold.
#' @param hard_auc numeric. Hard (upper bound on) fnr auc threshold. Default 10.
#' @param suff_nreads numeric. If not null, serves as an overriding upper bound on nreads threshold.
#' @param suff_ralign numeric. If not null, serves as an overriding upper bound on ralign threshold.
#' @param suff_breadth numeric. If not null, serves as an overriding upper bound on breadth threshold.
#' @param suff_auc numeric. If not null, serves as an overriding lower bound on fnr auc threshold.
#' @param plot logical. Should a plot be produced?
#' @param hist_breaks hist() breaks argument. Ignored if `plot=FALSE`.
#'
Expand All @@ -79,15 +100,16 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
#'
#'@importFrom mixtools normalmixEM
#'@importFrom diptest dip.test
#'@importFrom boot inv.logit
#'@export
#'
#'
metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
gene_filter = NULL, pos_controls = NULL,scale. = FALSE,glen = NULL,
AUC_range = c(0,6), zcut = 1,
AUC_range = c(0,15), zcut = 1,
mixture = TRUE, dip_thresh = 0.05,
hard_nreads = 25000, hard_ralign = 15, hard_breadth = 0.2, hard_fnr = 3,
suff_nreads = NULL, suff_ralign = 65, suff_breadth = 0.8, suff_fnr = NULL,
hard_nreads = 25000, hard_ralign = 15, hard_breadth = 0.2, hard_auc = 10,
suff_nreads = NULL, suff_ralign = NULL, suff_breadth = NULL, suff_auc = NULL,
plot = FALSE, hist_breaks = 10){

criterion_count = 0
Expand Down Expand Up @@ -210,17 +232,17 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
}

# Compute FNR AUC
ref.glms = simple_FNR_params(expr = nexpr, pos_controls = pos_controls)
logistic_coef = simple_FNR_params(expr = nexpr, pos_controls = pos_controls)
AUC = NULL
for (si in 1:dim(expr)[2]){
if(!any(is.na(ref.glms[[si]]))){
AUC[si] = log(exp(ref.glms[[si]][1] + ref.glms[[si]][2] * AUC_range[2]) + 1)/ref.glms[[si]][2] - log(exp(ref.glms[[si]][1] + ref.glms[[si]][2] * AUC_range[1]) + 1)/ref.glms[[si]][2]
if(logistic_coef[si,2] != 0){
AUC[si] = log(exp(logistic_coef[si,1] + logistic_coef[si,2] * AUC_range[2]) + 1)/logistic_coef[si,2] - log(exp(logistic_coef[si,1] + logistic_coef[si,2] * AUC_range[1]) + 1)/logistic_coef[si,2]
} else {
stop("glm fit did not converge")
AUC[si] = inv.logit(logistic_coef[si,1])*(AUC_range[2] - AUC_range[1])
}
}

AUC_CUTOFF = hard_fnr
AUC_CUTOFF = hard_auc

if (!is.null(zcut)){

Expand All @@ -239,8 +261,8 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
}
}

if(!is.null(suff_fnr)){
AUC_CUTOFF = max(AUC_CUTOFF,suff_fnr)
if(!is.null(suff_auc)){
AUC_CUTOFF = max(AUC_CUTOFF,suff_auc)
}
}
filtered_fnr = AUC > AUC_CUTOFF
Expand All @@ -254,7 +276,8 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,

is_bad = rep(FALSE,dim(expr)[2])

par(mfcol = c(criterion_count,2))
op <- par(mfcol = c(criterion_count,2))
on.exit(par(op))

if(!is.null(nreads)){
is_bad = filtered_nreads
Expand Down Expand Up @@ -304,7 +327,7 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
}else{
hist(AUC, main = paste0("auc: Thresh = ",signif(AUC_CUTOFF,3)," , Rm = ",sum(filtered_fnr)," , Tot_Rm = ",sum(is_bad)), xlab = "FNR AUC", breaks = hist_breaks)
}
abline(v = hard_fnr, col = "yellow", lty = 1)
abline(v = hard_auc, col = "yellow", lty = 1)
abline(v = AUC_CUTOFF, col = "blue", lty = 2)
}

Expand All @@ -320,13 +343,6 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
if(!is.null(pos_controls)){
hist(AUC[!is_bad], main = paste0("auc: Tot = ",sum(!is_bad)), xlab = "FNR AUC", breaks = hist_breaks)
}

# v = rbind(filtered_nreads,filtered_ralign,filtered_breadth,filtered_fnr)
# rownames(v) = c("nreads","ralign","breadth","fnr")
# v = na.omit(v)
# m = v %*% t(v)
#
# barplot(m, beside = TRUE, legend.text = TRUE)
}

return(list(filtered_nreads = filtered_nreads,
Expand Down Expand Up @@ -417,8 +433,9 @@ factor_sample_filter = function(expr, qual, gene_filter = NULL, max_exp_pcs = 5,
num_qual_pcs = which(csum > min_qual_variance)[1]

if(plot){
op <- par(mfrow = c(2,1))
on.exit(par(op))
for (i in 1:num_qual_pcs){
par(mfrow = c(2,1))
hist(qpc$x[,i],breaks = hist_breaks, main = paste0("Distribution of Quality PC ",i), xlab = paste0("Qual PC",i))
barplot(abs(qpc$rotation[,i]),col = c("red","green")[1 + (qpc$rotation[,i] > 0)], cex.names = .25,horiz = T, las=1, main = "Loadings")
}
Expand Down
Loading

0 comments on commit c7b195b

Please sign in to comment.