Skip to content

Commit

Permalink
Merge pull request #49 from fmicompbio/add-errorstats
Browse files Browse the repository at this point in the history
Add errorstats, various other small fixes
  • Loading branch information
csoneson authored Mar 26, 2024
2 parents 95615c4 + f35427e commit 421eb49
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 16 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mutscan
Title: Preprocessing and Analysis of Deep Mutational Scanning Data
Version: 0.3.0
Version: 0.3.1
Authors@R:
c(person(given = "Charlotte",
family = "Soneson",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ importFrom(edgeR,predFC)
importFrom(edgeR,scaleOffset)
importFrom(edgeR,topTags)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,annotate)
importFrom(ggplot2,coord_cartesian)
importFrom(ggplot2,element_blank)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# mutscan 0.3.1

* Adapt summarizeExperiment to include errorStatistics in metadata(se)
* Modify plotPairs to ignore NAs rather than give an error when calculating correlations

# mutscan 0.3.0

* Collapsing of variable sequences is no longer supported by digestFastqs. Use collapseMutantsBySimilarity() instead
Expand Down
14 changes: 10 additions & 4 deletions R/digestFastqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@
#' package = "mutscan"),
#' elementsForward = "SUCV", elementLengthsForward = c(1, 10, 18, 96),
#' constantForward = "AACCGGAGGAGGGAGCTG",
#' wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"),
#' wildTypeForward = c(FOS = paste0("ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTC",
#' "TGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA")),
#' nbrMutatedCodonsMaxForward = 1
#' )
#' ## Table with read counts and mutant information
Expand All @@ -288,7 +289,8 @@
#' elementsReverse = "SUCVS", elementLengthsReverse = c(1, 7, 17, 96, -1),
#' constantForward = "AACCGGAGGAGGGAGCTG",
#' constantReverse = "GAGTTCATCCTGGCAGC",
#' wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"),
#' wildTypeForward = c(FOS = paste0("ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTC",
#' "TGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA")),
#' nbrMutatedCodonsMaxForward = 1
#' )
#' ## Table with read counts and mutant information
Expand All @@ -315,8 +317,10 @@
#' elementsReverse = "SUCV", elementLengthsReverse = c(1, 8, 20, 96),
#' constantForward = "AACCGGAGGAGGGAGCTG",
#' constantReverse = "GAAAAAGGAAGCTGGAGAGA",
#' wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"),
#' wildTypeReverse = c(JUN = "ATCGCCCGGCTGGAGGAAAAAGTGAAAACCTTGAAAGCTCAGAACTCGGAGCTGGCGTCCACGGCCAACATGCTCAGGGAACAGGTGGCACAGCTT"),
#' wildTypeForward = c(FOS = paste0("ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTC",
#' "TGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA")),
#' wildTypeReverse = c(JUN = paste0("ATCGCCCGGCTGGAGGAAAAAGTGAAAACCTTGAAAGCTCAGAACTC",
#' "GGAGCTGGCGTCCACGGCCAACATGCTCAGGGAACAGGTGGCACAGCTT")),
#' nbrMutatedCodonsMaxForward = 1,
#' nbrMutatedCodonsMaxReverse = 1
#' )
Expand Down Expand Up @@ -626,6 +630,7 @@ digestFastqs <- function(fastqForward, fastqReverse = NULL,

CposFwd <- gregexpr(pattern = "C", elementsForward)[[1]]
if (nchar(constantForward[1]) > 0 &&
sum(CposFwd) != -1 &&
sum(elementLengthsForward[CposFwd]) != nchar(constantForward[1])) {
stop("The sum of the constant sequence lengths in elementsForward (",
sum(elementLengthsForward[CposFwd]),
Expand All @@ -634,6 +639,7 @@ digestFastqs <- function(fastqForward, fastqReverse = NULL,
}
CposRev <- gregexpr(pattern = "C", elementsReverse)[[1]]
if (nchar(constantReverse[1]) > 0 &&
sum(CposRev) != -1 &&
sum(elementLengthsReverse[CposRev]) != nchar(constantReverse[1])) {
stop("The sum of the constant sequence lengths in elementsReverse (",
sum(elementLengthsReverse[CposRev]),
Expand Down
8 changes: 5 additions & 3 deletions R/plotPairs.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#' @importFrom ggplot2 ggplot annotate theme_void ylim stat_density2d
#' scale_fill_continuous geom_point theme_bw theme element_blank aes
#' geom_histogram scale_x_continuous scale_y_continuous geom_abline
#' after_stat
#' @importFrom stats cor
#' @importFrom SummarizedExperiment assayNames assay
#' @importFrom grDevices hcl.colors rgb colorRamp
Expand Down Expand Up @@ -102,7 +103,8 @@ plotPairs <- function(se, selAssay = "counts", doLog = TRUE, pseudocount = 1,
yData <- GGally::eval_data_col(data, mapping$y)

## Calculate correlation
mainCor <- stats::cor(xData, yData, method = corMethod)
mainCor <- stats::cor(xData, yData, method = corMethod,
use = "pairwise.complete.obs")
transfCor <- (abs(mainCor) - min(corRange))/(max(corRange) - min(corRange))
## Protect against numerical imprecision leading to values outside
## the allowed range
Expand Down Expand Up @@ -141,7 +143,7 @@ plotPairs <- function(se, selAssay = "counts", doLog = TRUE, pseudocount = 1,
## Define function to create smoothscatter-like plot (for use with ggpairs)
smoothscat <- function(data, mapping, ...) {
ggplot2::ggplot(data = data, mapping = mapping) +
ggplot2::stat_density2d(ggplot2::aes(fill = (.data$..density..)^0.25), geom = "tile",
ggplot2::stat_density2d(ggplot2::aes(fill = ggplot2::after_stat(.data$density)^0.25), geom = "tile",
contour = FALSE, n = 200) +
ggplot2::scale_fill_continuous(low = "white", high = "darkgreen") +
ggtheme +
Expand Down Expand Up @@ -204,7 +206,7 @@ plotPairs <- function(se, selAssay = "counts", doLog = TRUE, pseudocount = 1,
substr(corMethod, 2, nchar(corMethod)), " correlation")

## Calculate correlations and get range
allCors <- stats::cor(mat, method = corMethod)
allCors <- stats::cor(mat, method = corMethod, use = "pairwise.complete.obs")
if (!is.null(corrColorRange)) {
corRange <- corrColorRange
} else {
Expand Down
7 changes: 5 additions & 2 deletions R/summarizeExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
#' package = "mutscan"),
#' elementsForward = "SUCV", elementLengthsForward = c(1, 10, 18, 96),
#' constantForward = "AACCGGAGGAGGGAGCTG",
#' wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"),
#' wildTypeForward = c(FOS = paste0("ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTC",
#' "TGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA")),
#' nbrMutatedCodonsMaxForward = 1
#' )
#' ## Output sample
Expand All @@ -67,7 +68,8 @@
#' package = "mutscan"),
#' elementsForward = "SUCV", elementLengthsForward = c(1, 10, 18, 96),
#' constantForward = "AACCGGAGGAGGGAGCTG",
#' wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"),
#' wildTypeForward = c(FOS = paste0("ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTC",
#' "TGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA")),
#' nbrMutatedCodonsMaxForward = 1
#' )
#' ## Combine
Expand Down Expand Up @@ -199,6 +201,7 @@ summarizeExperiment <- function(x, coldata, countType = "umis") {
colData = coldata[match(allSamples, coldata$Name), , drop = FALSE],
rowData = allSequences,
metadata = list(parameters = lapply(allSamples, function(w) x[[w]]$parameters),
errorStatistics = lapply(allSamples, function(w) x[[w]]$errorStatistics),
countType = countType,
mutNameDelimiter = mutnamedel)
)
Expand Down
12 changes: 8 additions & 4 deletions man/digestFastqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/summarizeExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/test_summarizeExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ test_that("summarizeExperiment works as expected with reads output", {

expect_equal(S4Vectors::metadata(se)$parameters[["sample1"]], out1$parameters)
expect_equal(S4Vectors::metadata(se)$parameters[["sample2"]], out2$parameters)
expect_equal(S4Vectors::metadata(se)$errorStatistics[["sample1"]], out1$errorStatistics)
expect_equal(S4Vectors::metadata(se)$errorStatistics[["sample2"]], out2$errorStatistics)

expect_true(all(c("mutantName", "sequence", "nbrMutBases",
"minNbrMutBases", "maxNbrMutBases",
Expand Down Expand Up @@ -198,6 +200,8 @@ test_that("summarizeExperiment works as expected with umis output", {

expect_equal(S4Vectors::metadata(se)$parameters[["sample1"]], out1$parameters)
expect_equal(S4Vectors::metadata(se)$parameters[["sample2"]], out2$parameters)
expect_equal(S4Vectors::metadata(se)$errorStatistics[["sample1"]], out1$errorStatistics)
expect_equal(S4Vectors::metadata(se)$errorStatistics[["sample2"]], out2$errorStatistics)
})

test_that("summarizeExperiment orders samples equally in count matrix/colData", {
Expand All @@ -223,8 +227,13 @@ test_that("summarizeExperiment recognizes the presence of UMI counts correctly",
})

test_that("summarizeExperiment works as expected when collapsing to WT", {
## Also test that errorStatistics is empty if there is no constant sequence
Ldef1$collapseToWTForward <- TRUE
Ldef2$collapseToWTForward <- TRUE
Ldef1$elementsForward = "SUSV"
Ldef1$elementsReverse = "SUSV"
Ldef2$elementsForward = "SUSV"
Ldef2$elementsReverse = "SUSV"

out1 <- do.call(digestFastqs, Ldef1)
out2 <- do.call(digestFastqs, Ldef2)
Expand Down Expand Up @@ -259,6 +268,10 @@ test_that("summarizeExperiment works as expected when collapsing to WT", {

expect_equal(S4Vectors::metadata(se)$parameters[["sample1"]], out1$parameters)
expect_equal(S4Vectors::metadata(se)$parameters[["sample2"]], out2$parameters)
expect_equal(S4Vectors::metadata(se)$errorStatistics[["sample1"]], out1$errorStatistics)
expect_equal(S4Vectors::metadata(se)$errorStatistics[["sample2"]], out2$errorStatistics)
expect_equal(sum(S4Vectors::metadata(se)$errorStatistics[["sample1"]][, c("nbrMatchForward", "nbrMismatchForward", "nbrMatchReverse", "nbrMismatchReverse")]), 0)
expect_equal(sum(S4Vectors::metadata(se)$errorStatistics[["sample2"]][, c("nbrMatchForward", "nbrMismatchForward", "nbrMatchReverse", "nbrMismatchReverse")]), 0)

expect_true(all(c("mutantName", "sequence", "nbrMutBases",
"minNbrMutBases", "maxNbrMutBases",
Expand Down

0 comments on commit 421eb49

Please sign in to comment.