From ab6b4f11d2a503160b0a3d36625cbb128af08694 Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Wed, 20 Sep 2023 20:31:16 +0200 Subject: [PATCH 01/11] Add loading function for VCF files. --- DESCRIPTION | 2 +- R/LoadingVCF_typewise.R | 225 ++++++++++++++++++++++++++++++++++++++++ README.md | 2 +- 3 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 R/LoadingVCF_typewise.R diff --git a/DESCRIPTION b/DESCRIPTION index 3b8532d..6d088de 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.23 +Version: 0.2.24 Authors@R: c( person(given = "Martin", family = "Grasshoff", diff --git a/R/LoadingVCF_typewise.R b/R/LoadingVCF_typewise.R new file mode 100644 index 0000000..f7551fa --- /dev/null +++ b/R/LoadingVCF_typewise.R @@ -0,0 +1,225 @@ +#'LoadingVCF_typewise +#'@description +#'We load a cellwise pileup result from a VCF file. +#' +#'@import Matrix SummarizedExperiment VariantAnnotation +#'@param samples_path Path to the input folder. Must include a barcodes file. +#'@param samples_file Path to the csv file with the samples to be loaded. +#'@param vcf_path Path to the VCF file with the variants. +#'@param snp_path Path to the SNP file used for VarTrix (SNV.loci.txt). +#'@param patient The patient you want to load. +#'@param type_use The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. +#'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL. +#'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2. +#'@export +LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, yield_size = 10000){ + if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(sample), !is.null(snp_path))){ + print(paste0("Loading the data for sample ", sample, ".")) + samples_file <- data.frame(patient = patient, sample = sample, input_folder = samples_path, cells = barcodes_path) + samples <- samples_file$sample + } else{ + print(paste0("Loading the data for patient ", patient, ".")) + print("We read in the samples file.") + samples_file <- read.csv(samples_file, stringsAsFactors = FALSE) + + + print("We subset to the patient of interest.") + samples_file <- samples_file[grep("vcf", samples_file$source, ignore.case = TRUE),] + samples_file <- samples_file[samples_file$patient == patient,] + samples_file <- samples_file[samples_file$type == type_use,] + + + print("We get the different samples.") + samples <- samples_file$sample + } + + + # A function to convert the heterozyguous/homozyguous information from the VCF to the consensus information from VarTrix. + # We define this function here, since we will not use it anywhere else. + # Maybe move it to a separate function. + char_to_numeric <- function(char_value) { + if(char_value == "1/1") return(2) + if(char_value %in% c("1/0", "0/1")) return(2) + if(char_value == "0/0") return(1) + return(0) + } + + + print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, read.table) + names(barcodes) <- samples + + + print("We read in the vcf file.") + vcf <- readVcf(vcf_path) + vcf_info <- info(vcf) + + + print("We load the VCF file.") + reads_matrix_total <- c() # The total number of reads + coverage_matrix_total <- c() # The alternative reads + ref_matrix_total <- c() # The reference reads + consensus_matrix_total <- c() + for(i in 1:length(samples)){ + print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_folder_use <- samples_file$input_folder[i] + sample_use <- samples_file$sample[i] + + # The cell barcodes and variants. + cellbarcodes_use <- barcodes[[sample_use]] + + # We load the VCF file. + vcf_data <- paste0(input_folder_use, sample_use, "/cellSNP.cells.sorted.vcf.gz") + depth_to_add <- VariantAnnotation::readGeno(vcf_data, "DP") + depth_to_add[is.na(depth_to_add)] <- 0 + rownames(depth_to_add) <- make.names(rownames(depth_to_add)) + colnames(depth_to_add) <- paste0(sample_use, "_", colnames(depth_to_add)) + depth_to_add <- as(depth_to_add, "sparseMatrix") + reads_matrix_total <- cbind(reads_matrix_total, depth_to_add) + + alts_to_add <- readGeno(vcf_data, "AD") + alts_to_add[is.na(alts_to_add)] <- 0 + rownames(alts_to_add) <- make.names(rownames(alts_to_add)) + colnames(alts_to_add) <- paste0(sample_use, "_", colnames(alts_to_add)) + alts_to_add <- as(alts_to_add, "sparseMatrix") + coverage_matrix_total <- cbind(coverage_matrix_total, alts_to_add) + + consensus_to_add <- readGeno(vcf_data, "GT") + consensus_to_add <- matrix(sapply(consensus_to_add, char_to_numeric), nrow = nrow(consensus_to_add), dimnames = list(make.names(rownames(consensus_to_add)), paste0(sample_use, "_", colnames(consensus_to_add)))) + consensus_to_add <- as(consensus_to_add, "sparseMatrix") + consensus_matrix_total <- cbind(consensus_matrix_total, consensus_to_add) + } + ref_matrix_total <- reads_matrix_total - coverage_matrix_total + rm(consensus_to_add, alts_to_add, depth_to_add) + + + print("We generate more accessible names.") + if(all(c("GENE", "AA", "CDS") %in% colnames(vcf_info))){ + new_names <- paste0(vcf_info$GENE, "_", vcf_info$AA, "_", vcf_info$CDS) + names(new_names) <- make.names(paste0(as.character(rep(seqnames(vcf)@values, seqnames(vcf)@lengths)), ".", start(vcf), "_", as.character(ref(vcf)), ".", as.character(unlist(alt(vcf))))) + new_names <- new_names[rownames(ref_matrix_total)] + } else{ + new_names <- rownames(vcf_info) + new_names <- gsub(":|\\/|\\?", "_", new_names) + names(new_names) <- make.names(paste0(as.character(rep(seqnames(vcf)@values, seqnames(vcf)@lengths)), ".", start(vcf), "_", as.character(ref(vcf)), ".", as.character(unlist(alt(vcf))))) + new_names <- new_names[rownames(ref_matrix_total)] + } + + + if(!is.null(min_reads)){ + print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) + print("We then generate the consensus matrix again.") + ref_matrix_total@x[ref_matrix_total@x < min_reads] <- 0 + coverage_matrix_total@x[coverage_matrix_total@x < min_reads] <- 0 + + reference_construction <- ref_matrix_total + reference_construction@x[reference_construction@x > 0] <- 1 + + coverage_construction <- coverage_matrix_total + coverage_construction@x[coverage_construction@x > 0] <- 2 + + consensus_matrix_total <- reference_construction + coverage_construction + rm(reference_construction, coverage_construction) + } + + + # We check if number of rows of the matrices are the same as the length of the new names. + if(all(nrow(coverage_matrix_total) != length(new_names))){ + input_rows <- nrow(coverage_matrix_total) + new_names_length <- length(new_names) + stop(paste0("Error: you have ", input_rows, " variants in you matrix and ", new_names_length, " actual variant names.")) + } + + rownames(coverage_matrix_total) <- new_names + rownames(ref_matrix_total) <- new_names + rownames(consensus_matrix_total) <- new_names + + + print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) + keep_variants <- rowSums(consensus_matrix_total >= 1) + keep_variants <- keep_variants >= min_cells + # If we only have one cell or one variant, we loose the matrix. + cell_ids <- colnames(consensus_matrix_total) + variant_names <- names(keep_variants[keep_variants]) + consensus_matrix_total <- consensus_matrix_total[keep_variants, ] + consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(consensus_matrix_total) <- cell_ids + rownames(consensus_matrix_total) <- variant_names + consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[keep_variants, ] + coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(coverage_matrix_total) <- cell_ids + rownames(coverage_matrix_total) <- variant_names + coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[keep_variants, ] + ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(ref_matrix_total) <- cell_ids + rownames(ref_matrix_total) <- variant_names + ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") + + + print("We remove cells that are always NoCall.") + consensus_test <- consensus_matrix_total > 0 + keep_cells <- colSums(consensus_test) > 0 + # If we only have one cell or one variant, we loose the matrix. + cell_ids <- names(keep_cells[keep_cells]) + variant_names <- rownames(consensus_matrix_total) + consensus_matrix_total <- consensus_matrix_total[, keep_cells] + consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(consensus_matrix_total) <- cell_ids + rownames(consensus_matrix_total) <- variant_names + consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[, keep_cells] + coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(coverage_matrix_total) <- cell_ids + rownames(coverage_matrix_total) <- variant_names + coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[, keep_cells] + ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(ref_matrix_total) <- cell_ids + rownames(ref_matrix_total) <- variant_names + ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") + + + print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) + print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) + + rm(consensus_test, keep_variants, keep_cells) + gc() + + + print("We transform the sparse matrices to matrices, so we can calculate the fraction.") + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total + fraction_total[is.na(fraction_total)] <- 0 + gc() + + + # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. + dim_test <- dim(reads_total) + if(any(dim_test == 0)){ + print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + print("Returning NULL.") + return(NULL) + } else { + print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") + # We want an assay for the Consensus information and for the fraction. + # As meta data we add a data frame showing the cell id, the associated patient and the sample. + coverage_depth_per_cell <- colMeans(reads_total) + coverage_depth_per_variant <- rowMeans(reads_total) + meta_data <- data.frame(Cell = colnames(consensus_matrix_total), Type = type_use, AverageCoverage = coverage_depth_per_cell) + rownames(meta_data) <- meta_data$Cell + meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) + rownames(meta_row) <- meta_row$VariantName + #se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "dgCMatrix"), fraction = as(fraction_total, "dgCMatrix"), coverage = as(reads_total, "dgCMatrix")), + # colData = meta_data) + se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), + alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) + return(se_merged) + } +} + diff --git a/README.md b/README.md index 29b2305..5b41d5f 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.23 +# Current Features v0.2.24 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. From d936c4b03c0782dfb5f378bb14b093efc68cf35a Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Wed, 20 Sep 2023 20:46:37 +0200 Subject: [PATCH 02/11] Updated description. --- NAMESPACE | 1 + R/LoadingVCF_typewise.R | 5 ++--- man/LoadingVCF_typewise.Rd | 40 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 man/LoadingVCF_typewise.Rd diff --git a/NAMESPACE b/NAMESPACE index d71eb89..cebd313 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(GetCellInfoPerVariant) export(GetVariantInfo) export(HeatmapVoi) export(LoadingMAEGATK_typewise) +export(LoadingVCF_typewise) export(LoadingVarTrix_typewise) export(Merging_SE_list) export(RowWiseSplit) diff --git a/R/LoadingVCF_typewise.R b/R/LoadingVCF_typewise.R index f7551fa..4d3cd37 100644 --- a/R/LoadingVCF_typewise.R +++ b/R/LoadingVCF_typewise.R @@ -6,14 +6,13 @@ #'@param samples_path Path to the input folder. Must include a barcodes file. #'@param samples_file Path to the csv file with the samples to be loaded. #'@param vcf_path Path to the VCF file with the variants. -#'@param snp_path Path to the SNP file used for VarTrix (SNV.loci.txt). #'@param patient The patient you want to load. #'@param type_use The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. #'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL. #'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2. #'@export -LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, yield_size = 10000){ - if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(sample), !is.null(snp_path))){ +LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2){ + if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(sample))){ print(paste0("Loading the data for sample ", sample, ".")) samples_file <- data.frame(patient = patient, sample = sample, input_folder = samples_path, cells = barcodes_path) samples <- samples_file$sample diff --git a/man/LoadingVCF_typewise.Rd b/man/LoadingVCF_typewise.Rd new file mode 100644 index 0000000..8a2e1eb --- /dev/null +++ b/man/LoadingVCF_typewise.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LoadingVCF_typewise.R +\name{LoadingVCF_typewise} +\alias{LoadingVCF_typewise} +\title{LoadingVCF_typewise} +\usage{ +LoadingVCF_typewise( + samples_file, + samples_path = NULL, + barcodes_path = NULL, + snp_path = NULL, + vcf_path, + patient, + sample = NULL, + type_use = "scRNAseq_Somatic", + min_reads = NULL, + min_cells = 2, + yield_size = 10000 +) +} +\arguments{ +\item{samples_file}{Path to the csv file with the samples to be loaded.} + +\item{samples_path}{Path to the input folder. Must include a barcodes file.} + +\item{snp_path}{Path to the SNP file used for VarTrix (SNV.loci.txt).} + +\item{vcf_path}{Path to the VCF file with the variants.} + +\item{patient}{The patient you want to load.} + +\item{type_use}{The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT.} + +\item{min_reads}{The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL.} + +\item{min_cells}{The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2.} +} +\description{ +We load a cellwise pileup result from a VCF file. +} From 3274fca3674394386382ffb5ccc73e08f2597211 Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Mon, 25 Sep 2023 11:46:32 +0200 Subject: [PATCH 03/11] Fixed error in VariantQuantil_Thresholding.R --- DESCRIPTION | 2 +- R/VariantQuantileThresholding.R | 40 ++++++++++++++++++++------------- README.md | 2 +- man/LoadingVCF_typewise.Rd | 6 +---- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6d088de..1d72594 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.24 +Version: 0.2.25 Authors@R: c( person(given = "Martin", family = "Grasshoff", diff --git a/R/VariantQuantileThresholding.R b/R/VariantQuantileThresholding.R index 16b1dfd..48985af 100755 --- a/R/VariantQuantileThresholding.R +++ b/R/VariantQuantileThresholding.R @@ -23,6 +23,9 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, print("Get the mean allele frequency and coverage.") mean_af <- rowMeans(assays(SE)[["fraction"]], na.rm = TRUE) mean_cov <- rowMeans(assays(SE)[["coverage"]], na.rm = TRUE) + if(all(is.null(min_quality), is.numeric(min_quality))) stop("Error: Your minimum quality is not either NULL or a numeric.") + if(all(is.null(mean_allele_frequency), is.numeric(mean_allele_frequency))) stop("Error: Your mean allele frequency is not either NULL or a numeric.") + if(all(!is.null(group_of_interest), !is.null(group1), !is.null(group2))){ if(!group_of_interest %in% colnames(colData(SE))) stop("Error: Your group_of_interest is not in the colData.") if(!group1 %in% colData(SE)[,group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") @@ -38,7 +41,13 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, print("Get the quantiles of the VAFs of each variant.") quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + if(!is.null(min_quality)){ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + } vars <- vars[mean_af_group_check,] print("Thresholding using the quantile approach.") @@ -46,26 +55,25 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) - if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) - } + + } else if(any(is.null(top_cells), is.null(top_VAF))){ print("Get the quantiles of the VAFs of each variant.") quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + if(!is.null(min_quality)){ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + } print("Thresholding using the quantile approach.") if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) - if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) - } } else{ print("Get the quantile of the VAF of each variant.") if(length(quantiles) > 1) stop("You are providing more than 1 quantile. You should only provide 1.") @@ -75,12 +83,14 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, top_cells_values <- assays(SE)[["fraction"]] top_cells_values <- top_cells_values > top_VAF top_cells_values <- rowSums(top_cells_values) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) - voi_ch <- subset(vars, Mean_Cov > min_coverage & Quantile <= thresholds[1] & TopCells >= top_cells) if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile = quantiles, TopCells = top_cells_values) } + voi_ch <- subset(vars, Mean_Cov > min_coverage & Quantile <= thresholds[1] & TopCells >= top_cells) } voi_ch <- rownames(voi_ch) return(voi_ch) diff --git a/README.md b/README.md index 5b41d5f..e0c3a73 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.24 +# Current Features v0.2.25 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. diff --git a/man/LoadingVCF_typewise.Rd b/man/LoadingVCF_typewise.Rd index 8a2e1eb..54a22ab 100644 --- a/man/LoadingVCF_typewise.Rd +++ b/man/LoadingVCF_typewise.Rd @@ -8,14 +8,12 @@ LoadingVCF_typewise( samples_file, samples_path = NULL, barcodes_path = NULL, - snp_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, - min_cells = 2, - yield_size = 10000 + min_cells = 2 ) } \arguments{ @@ -23,8 +21,6 @@ LoadingVCF_typewise( \item{samples_path}{Path to the input folder. Must include a barcodes file.} -\item{snp_path}{Path to the SNP file used for VarTrix (SNV.loci.txt).} - \item{vcf_path}{Path to the VCF file with the variants.} \item{patient}{The patient you want to load.} From 8276e1149c1ac56fdd3bca8ea70fdaebe2bc5f76 Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Mon, 25 Sep 2023 13:59:17 +0200 Subject: [PATCH 04/11] Fixed error in Supplementing. --- DESCRIPTION | 2 +- R/AmpliconSupplementing.R | 32 +++++++++++++++++------------- R/VariantQuantileThresholding.R | 2 +- README.md | 2 +- man/VariantQuantileThresholding.Rd | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1d72594..757d60d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.25 +Version: 0.2.26 Authors@R: c( person(given = "Martin", family = "Grasshoff", diff --git a/R/AmpliconSupplementing.R b/R/AmpliconSupplementing.R index 28eb7d2..26da9d0 100644 --- a/R/AmpliconSupplementing.R +++ b/R/AmpliconSupplementing.R @@ -23,20 +23,24 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){ new_row_data <- merge(rowData(scRNAseq), rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, suffixes = c("scRNAseq", "Amplicon")) rownames(new_row_data) <- new_row_data$VariantName - # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. - # We do the same for the concordance and the depth. - new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq - amplicon_value <- rowData(amplicon)$VariantQuality - names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) - new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value - - new_row_data$Concordance <- new_row_data$ConcordancescRNAseq - amplicon_value <- rowData(amplicon)$Concordance - names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) - new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value - + if("VariantQualityscRNAseq" %in% colnames(new_row_data)){ + # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. + # We do the same for the concordance and the depth. + new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq + amplicon_value <- rowData(amplicon)$VariantQuality + names(amplicon_value) <- rownames(amplicon) + amplicon_value <- na.omit(amplicon_value) + new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value + } + + if("ConcordancescRNAseq" %in% colnames(new_row_data)){ + new_row_data$Concordance <- new_row_data$ConcordancescRNAseq + amplicon_value <- rowData(amplicon)$Concordance + names(amplicon_value) <- rownames(amplicon) + amplicon_value <- na.omit(amplicon_value) + new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value + } + new_row_data$Depth <- new_row_data$DepthscRNAseq amplicon_value <- rowData(amplicon)$Depth names(amplicon_value) <- rownames(amplicon) diff --git a/R/VariantQuantileThresholding.R b/R/VariantQuantileThresholding.R index 48985af..b555f67 100755 --- a/R/VariantQuantileThresholding.R +++ b/R/VariantQuantileThresholding.R @@ -18,7 +18,7 @@ #'@param group2 The second group of interest. #'@param group_factor How much higher has the mean allele frequency to be in group 1 when compared to group 2? #'@export -VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = 30, mean_allele_frequency = 0, +VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = NULL, mean_allele_frequency = 0, group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL){ print("Get the mean allele frequency and coverage.") mean_af <- rowMeans(assays(SE)[["fraction"]], na.rm = TRUE) diff --git a/README.md b/README.md index e0c3a73..5afd388 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.25 +# Current Features v0.2.26 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. diff --git a/man/VariantQuantileThresholding.Rd b/man/VariantQuantileThresholding.Rd index 5d3d435..953da42 100644 --- a/man/VariantQuantileThresholding.Rd +++ b/man/VariantQuantileThresholding.Rd @@ -11,7 +11,7 @@ VariantQuantileThresholding( thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, - min_quality = 30, + min_quality = NULL, mean_allele_frequency = 0, group_of_interest = NULL, group1 = NULL, From 6d7d4c73f1de193e3457ebed66116ca66b90ab51 Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Thu, 28 Sep 2023 13:43:45 +0200 Subject: [PATCH 05/11] Fixed error in filtering. --- DESCRIPTION | 2 +- R/AmpliconSupplementing.R | 4 ++-- R/Filtering.R | 6 ++++-- README.md | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 757d60d..741c06e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.26 +Version: 0.2.27 Authors@R: c( person(given = "Martin", family = "Grasshoff", diff --git a/R/AmpliconSupplementing.R b/R/AmpliconSupplementing.R index 26da9d0..cd3b936 100644 --- a/R/AmpliconSupplementing.R +++ b/R/AmpliconSupplementing.R @@ -10,8 +10,8 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){ # We supplement the scRNAseq data with the amplicon data. print("We get the new meta data.") - new_meta_data <- merge(colData(scRNAseq), colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, - suffixes = c("scRNAseq", "Amplicon")) + new_meta_data <- S4Vectors::merge(colData(scRNAseq), colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, + suffixes = c("scRNAseq", "Amplicon")) rownames(new_meta_data) <- new_meta_data$Cell # We add an AverageCoverage column to the new meta data. new_meta_data$AverageCoverage <- new_meta_data$AverageCoveragescRNAseq diff --git a/R/Filtering.R b/R/Filtering.R index 24b1c09..67e286f 100644 --- a/R/Filtering.R +++ b/R/Filtering.R @@ -50,10 +50,12 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = # If the fraction_threshold is 0, we skip the thresholding. 0 and NULL would be the same. # We might want to use a fraction_threshold of 0 to use the same variable to create file paths later. + if(!is.null(fraction_threshold)){ if(fraction_threshold == 0){ - fraction_threshold <- NULL + fraction_threshold <- NULL + } } - + if(!is.null(fraction_threshold)){ if(any(fraction_threshold >= 1, fraction_threshold <= 0)){ stop("Your fraction threshold is not 0 < x < 1.") diff --git a/README.md b/README.md index 5afd388..7998f60 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.26 +# Current Features v0.2.27 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. From 3beaeff3add40387691d22bbbb8bb396f166fff6 Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Thu, 28 Sep 2023 16:56:19 +0200 Subject: [PATCH 06/11] Uploading fixes. --- DESCRIPTION | 2 +- R/VariantQuantileThresholding.R | 4 ++-- README.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 741c06e..5a77fff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.27 +Version: 0.2.28 Authors@R: c( person(given = "Martin", family = "Grasshoff", diff --git a/R/VariantQuantileThresholding.R b/R/VariantQuantileThresholding.R index b555f67..a9e3b97 100755 --- a/R/VariantQuantileThresholding.R +++ b/R/VariantQuantileThresholding.R @@ -42,7 +42,7 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) if(!is.null(min_quality)){ - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) vars <- vars[is.na(vars$VariantQuality), ] vars <- subset(vars, VariantQuality > min_quality) } else{ @@ -62,7 +62,7 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) if(!is.null(min_quality)){ - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) vars <- vars[is.na(vars$VariantQuality), ] vars <- subset(vars, VariantQuality > min_quality) } else{ diff --git a/README.md b/README.md index 7998f60..5b54b47 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.27 +# Current Features v0.2.28 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. From c81760e73630924961143c304c6f1f7e43b28b30 Mon Sep 17 00:00:00 2001 From: mgrasshoff Date: Wed, 1 Nov 2023 15:32:50 +0100 Subject: [PATCH 07/11] Declared used functions and added some tests. --- DESCRIPTION | 5 +- NAMESPACE | 7 +- R/AmpliconSupplementing.R | 43 ++--- R/CalculateAltReads.R | 10 +- R/CalculateConsensus.R | 4 +- R/CalculateCorrelationPValue.R | 2 +- R/CalculateCoverage.R | 4 +- R/CalculateFisherTestPValue.R | 2 +- R/CalculateQuality.R | 6 +- R/CalculateStrandCorrelation.R | 66 +++---- R/CombineSEobjects.R | 33 ++-- R/Filtering.R | 35 ++-- R/GetCellInfoPerVariant.R | 11 +- R/GetVariantInfo.R | 4 +- R/HeatmapVOI.R | 30 +-- R/LoadingMAEGATK_typewise.R | 96 ++-------- R/LoadingVCF_typewise.R | 50 ++--- R/LoadingVarTrix_typewise.R | 181 +++++------------- R/RowWiseSplit.R | 4 +- R/SetVariantInfo.R | 2 +- R/VariantBurden.R | 4 +- R/VariantCloneSizeThresholding.R | 24 +-- R/VariantCorrelationHeatmap.R | 26 +-- R/VariantFisherTestHeatmap.R | 30 +-- R/VariantQuantileThresholding.R | 37 ++-- R/VariantWiseCorrelation.R | 6 +- R/VariantWiseFisherTest.R | 6 +- R/char_to_numeric.R | 12 ++ R/combine_NAMES.R | 3 +- R/combine_SparseMatrix.R | 4 +- R/computeAFMutMatrix.R | 41 +--- R/getAltMatrix.R | 4 +- R/getMutMatrix.R | 25 +++ R/getReadMatrix.R | 4 +- R/getRefMatrix.R | 4 +- R/get_consensus.R | 6 +- R/ggsci_pal.R | 5 +- R/sdiv.R | 4 +- README.md | 2 +- docs/404.html | 2 +- docs/articles/SiGURD.html | 39 ++-- docs/articles/index.html | 2 +- docs/authors.html | 6 +- docs/deps/bootstrap-5.2.2/bootstrap.min.css | 4 +- docs/index.html | 4 +- docs/pkgdown.yml | 2 +- docs/reference/AmpliconSupplementing.html | 12 +- docs/reference/CalculateAlleleFrequency.html | 2 +- docs/reference/CalculateAltReads.html | 2 +- docs/reference/CalculateConsensus.html | 20 +- .../reference/CalculateCorrelationPValue.html | 2 +- docs/reference/CalculateCoverage.html | 2 +- docs/reference/CalculateFisherTestPValue.html | 2 +- docs/reference/CalculateQuality.html | 8 +- .../reference/CalculateStrandCorrelation.html | 2 +- docs/reference/CombineSEobjects.html | 2 +- docs/reference/Filtering.html | 10 +- docs/reference/GetCellInfoPerVariant.html | 2 +- docs/reference/GetVariantInfo.html | 95 +++++++++ docs/reference/HeatmapVoi.html | 10 +- docs/reference/LoadingMAEGATK_typewise.html | 2 +- docs/reference/LoadingVCF_typewise.html | 114 +++++++++++ docs/reference/LoadingVarTrix_ori.html | 2 +- docs/reference/LoadingVarTrix_typewise.html | 2 +- docs/reference/Merging_SE_list.html | 2 +- docs/reference/RowWiseSplit.html | 2 +- docs/reference/SeparatingMatrixToList.html | 2 +- docs/reference/SetVariantInfo.html | 95 +++++++++ docs/reference/VariantBurden.html | 2 +- .../VariantCloneSizeThresholding.html | 2 +- docs/reference/VariantCorrelationHeatmap.html | 12 +- docs/reference/VariantFisherTestHeatmap.html | 12 +- .../VariantQuantileThresholding.html | 29 ++- docs/reference/VariantWiseCorrelation.html | 14 +- docs/reference/VariantWiseFisherTest.html | 14 +- docs/reference/char_to_numeric.html | 83 ++++++++ docs/reference/combine_NAMES.html | 2 +- docs/reference/combine_SparseMatrix.html | 2 +- docs/reference/computeAFMutMatrix.html | 15 +- docs/reference/getAltMatrix.html | 2 +- docs/reference/getMutMatrix.html | 95 +++++++++ docs/reference/getReadMatrix.html | 2 +- docs/reference/getRefMatrix.html | 2 +- docs/reference/get_consensus.html | 12 +- docs/reference/ggsci_pal.html | 2 +- docs/reference/index.html | 31 ++- docs/reference/load_object.html | 2 +- docs/reference/save_object.html | 2 +- docs/reference/sdiv.html | 2 +- docs/search.json | 2 +- docs/sitemap.xml | 15 ++ inst/extdata/.~lock.Input_Example_local.csv# | 1 + inst/extdata/Input_Example_local.csv | 14 +- man/CalculateQuality.Rd | 6 +- man/VariantCorrelationHeatmap.Rd | 2 + man/VariantFisherTestHeatmap.Rd | 2 + man/VariantWiseCorrelation.Rd | 2 + man/VariantWiseFisherTest.Rd | 2 + man/char_to_numeric.Rd | 15 ++ man/computeAFMutMatrix.Rd | 3 +- man/getMutMatrix.Rd | 21 ++ man/get_consensus.Rd | 2 + tests/testthat.R | 12 ++ tests/testthat/test-char_to_numeric.R | 7 + tests/testthat/test-combine_NAMES.R | 6 + tests/testthat/test-ggsci_pal.R | 5 + vignettes/SiGURD.Rmd | 16 +- 107 files changed, 1145 insertions(+), 620 deletions(-) create mode 100644 R/char_to_numeric.R create mode 100644 R/getMutMatrix.R create mode 100644 docs/reference/GetVariantInfo.html create mode 100644 docs/reference/LoadingVCF_typewise.html create mode 100644 docs/reference/SetVariantInfo.html create mode 100644 docs/reference/char_to_numeric.html create mode 100644 docs/reference/getMutMatrix.html create mode 100644 inst/extdata/.~lock.Input_Example_local.csv# create mode 100644 man/char_to_numeric.Rd create mode 100644 man/getMutMatrix.Rd create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-char_to_numeric.R create mode 100644 tests/testthat/test-combine_NAMES.R create mode 100644 tests/testthat/test-ggsci_pal.R diff --git a/DESCRIPTION b/DESCRIPTION index 5a77fff..5eb263f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.28 +Version: 0.2.29 Authors@R: c( person(given = "Martin", family = "Grasshoff", @@ -38,3 +38,6 @@ Imports: tidyverse, VariantAnnotation RoxygenNote: 7.2.3 +Suggests: + testthat (>= 3.0.0) +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index cebd313..07501ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,10 +28,12 @@ export(VariantFisherTestHeatmap) export(VariantQuantileThresholding) export(VariantWiseCorrelation) export(VariantWiseFisherTest) +export(char_to_numeric) export(combine_NAMES) export(combine_SparseMatrix) export(computeAFMutMatrix) export(getAltMatrix) +export(getMutMatrix) export(getReadMatrix) export(getRefMatrix) export(get_consensus) @@ -52,14 +54,13 @@ import(circlize) import(data.table) import(dplyr) import(fastmatch) -import(ggplot2) import(ggsci) import(glue) import(grid) import(parallel) -import(rcompanion) import(scales) import(stats) import(tibble) -import(tidyr) import(tidyverse) +importFrom(BiocGenerics,start) +importFrom(GenomeInfoDb,seqnames) diff --git a/R/AmpliconSupplementing.R b/R/AmpliconSupplementing.R index cd3b936..eb07f8b 100644 --- a/R/AmpliconSupplementing.R +++ b/R/AmpliconSupplementing.R @@ -2,15 +2,14 @@ #' #'@description #'We replace the values from an scRNAseq experiment with values we have from an amplicon experiment. -#' -#'@import archive Matrix SummarizedExperiment VariantAnnotation +#'@import Matrix SummarizedExperiment #'@param scRNAseq The SummarizedExperiment object containing the scRNAseq data. #'@param amplicon The SummarizedExperiment object containing the amplicon data. #'@export AmpliconSupplementing <- function(scRNAseq, amplicon){ # We supplement the scRNAseq data with the amplicon data. print("We get the new meta data.") - new_meta_data <- S4Vectors::merge(colData(scRNAseq), colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, + new_meta_data <- S4Vectors::merge(SummarizedExperiment::colData(scRNAseq), SummarizedExperiment::colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, suffixes = c("scRNAseq", "Amplicon")) rownames(new_meta_data) <- new_meta_data$Cell # We add an AverageCoverage column to the new meta data. @@ -20,14 +19,14 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){ amplicon_value <- na.omit(amplicon_value) new_meta_data[names(amplicon_value), "AverageCoverage"] <- amplicon_value - new_row_data <- merge(rowData(scRNAseq), rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, + new_row_data <- merge(SummarizedExperiment::rowData(scRNAseq), SummarizedExperiment::rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, suffixes = c("scRNAseq", "Amplicon")) rownames(new_row_data) <- new_row_data$VariantName if("VariantQualityscRNAseq" %in% colnames(new_row_data)){ # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. # We do the same for the concordance and the depth. new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq - amplicon_value <- rowData(amplicon)$VariantQuality + amplicon_value <- SummarizedExperiment::rowData(amplicon)$VariantQuality names(amplicon_value) <- rownames(amplicon) amplicon_value <- na.omit(amplicon_value) new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value @@ -35,14 +34,14 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){ if("ConcordancescRNAseq" %in% colnames(new_row_data)){ new_row_data$Concordance <- new_row_data$ConcordancescRNAseq - amplicon_value <- rowData(amplicon)$Concordance + amplicon_value <- SummarizedExperiment::rowData(amplicon)$Concordance names(amplicon_value) <- rownames(amplicon) amplicon_value <- na.omit(amplicon_value) new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value } new_row_data$Depth <- new_row_data$DepthscRNAseq - amplicon_value <- rowData(amplicon)$Depth + amplicon_value <- SummarizedExperiment::rowData(amplicon)$Depth names(amplicon_value) <- rownames(amplicon) amplicon_value <- na.omit(amplicon_value) new_row_data[names(amplicon_value), "Depth"] <- amplicon_value @@ -64,25 +63,25 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){ refs <- consensus print("We fill the output matrices.") - consensus[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$consensus) - fraction[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$fraction) - reads[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$coverage) - alts[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$alts) - refs[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$refs) + consensus[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$consensus) + fraction[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$fraction) + reads[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$coverage) + alts[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$alts) + refs[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$refs) print("We add the the amplicon information.") - consensus[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$consensus) - fraction[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$fraction) - reads[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage) - alts[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$alts) - refs[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$refs) + consensus[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$consensus) + fraction[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$fraction) + reads[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$coverage) + alts[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$alts) + refs[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$refs) #print("We add the the amplicon information.") - #assays(scRNAseq)[["consensus"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$consensus) - #assays(scRNAseq)[["fraction"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$fraction) - #assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage) + #SummarizedExperiment::assays(scRNAseq)[["consensus"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$consensus) + #SummarizedExperiment::assays(scRNAseq)[["fraction"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$fraction) + #SummarizedExperiment::assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$coverage) - se <- SummarizedExperiment(assays = list(consensus = as(consensus, "dgCMatrix"), fraction = as(fraction, "dgCMatrix"), coverage = as(reads, "dgCMatrix"), alts = as(alts, "dgCMatrix"), refs = as(refs, "dgCMatrix")), - colData = new_meta_data, rowData = new_row_data) + se <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = as(consensus, "dgCMatrix"), fraction = as(fraction, "dgCMatrix"), coverage = as(reads, "dgCMatrix"), alts = as(alts, "dgCMatrix"), refs = as(refs, "dgCMatrix")), + colData = new_meta_data, rowData = new_row_data) return(se) } diff --git a/R/CalculateAltReads.R b/R/CalculateAltReads.R index 0b0c165..de86fe6 100644 --- a/R/CalculateAltReads.R +++ b/R/CalculateAltReads.R @@ -6,20 +6,20 @@ #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateAltReads <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - reads_A <- assays(SE)[["A_counts_fw"]] + assays(SE)[["A_counts_rev"]] + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + reads_A <- SummarizedExperiment::assays(SE)[["A_counts_fw"]] + SummarizedExperiment::assays(SE)[["A_counts_rev"]] rownames(reads_A) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A), "_", ref_allele, "_A") reads_A <- reads_A[ref_allele != "A",] - reads_C <- assays(SE)[["C_counts_fw"]] + assays(SE)[["C_counts_rev"]] + reads_C <- SummarizedExperiment::assays(SE)[["C_counts_fw"]] + SummarizedExperiment::assays(SE)[["C_counts_rev"]] rownames(reads_C) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C), "_", ref_allele, "_C") reads_C <- reads_C[ref_allele != "C",] - reads_G <- assays(SE)[["G_counts_fw"]] + assays(SE)[["G_counts_rev"]] + reads_G <- SummarizedExperiment::assays(SE)[["G_counts_fw"]] + SummarizedExperiment::assays(SE)[["G_counts_rev"]] rownames(reads_G) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G), "_", ref_allele, "_G") reads_G <- reads_G[ref_allele != "G",] - reads_T <- assays(SE)[["T_counts_fw"]] + assays(SE)[["T_counts_rev"]] + reads_T <- SummarizedExperiment::assays(SE)[["T_counts_fw"]] + SummarizedExperiment::assays(SE)[["T_counts_rev"]] rownames(reads_T) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T), "_", ref_allele, "_T") reads_T <- reads_T[ref_allele != "T",] diff --git a/R/CalculateConsensus.R b/R/CalculateConsensus.R index 84172fb..622eaed 100644 --- a/R/CalculateConsensus.R +++ b/R/CalculateConsensus.R @@ -1,5 +1,5 @@ #'We calculate the consensus information from the MAEGATK results. -#'@import dplyr MatrixGenerics SummarizedExperiment +#'@import MatrixGenerics SummarizedExperiment #'@param SE SummarizedExperiment object. #'@param chromosome_prefix The chromosome name used as a prefix. #'@export @@ -11,7 +11,7 @@ CalculateConsensus <- function(SE, chromosome_prefix = "chrM"){ print("We get the read information per position.") letter <- c("A", "C", "G", "T") - ref_allele <- as.character(rowRanges(SE)$refAllele) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) reads <- lapply(letter, getReadMatrix, SE = SE, chromosome_prefix = chromosome_prefix) # Since we have always the same 4 bases, we get all possible combinations by assigning numeric values. # A = 8, C = 4, G = 2, T = 1. diff --git a/R/CalculateCorrelationPValue.R b/R/CalculateCorrelationPValue.R index 4f63056..830c3eb 100644 --- a/R/CalculateCorrelationPValue.R +++ b/R/CalculateCorrelationPValue.R @@ -27,7 +27,7 @@ CalculateCorrelationPValue <- function(variant_values, other_mutation, all_varia result <- c(NA,NA,NA,NA,NA,NA) return(result) } else if(length(variant_values) > 2){ - result <- cor.test(variant_values, other_variant_values) + result <- stats::cor.test(variant_values, other_variant_values) cells_som_alt <- sum(variant_values == 1) cells_som_ref <- sum(variant_values == 0) cells_MT_alt <- sum(other_variant_values == 1) diff --git a/R/CalculateCoverage.R b/R/CalculateCoverage.R index 9ef7f5a..4701587 100644 --- a/R/CalculateCoverage.R +++ b/R/CalculateCoverage.R @@ -6,8 +6,8 @@ #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateCoverage <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - coverage <- assays(SE)[["coverage"]] + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + coverage <- SummarizedExperiment::assays(SE)[["coverage"]] rownames(coverage) <- paste0(chromosome_prefix, "_", 1:nrow(coverage), "_", ref_allele, "_A") coverage_A <- coverage[ref_allele != "A",] diff --git a/R/CalculateFisherTestPValue.R b/R/CalculateFisherTestPValue.R index 3c7652e..442d8b2 100644 --- a/R/CalculateFisherTestPValue.R +++ b/R/CalculateFisherTestPValue.R @@ -39,7 +39,7 @@ CalculateFisherTestPValue <- function(variant_values, other_mutation, all_varian count_matrix[2,1] <- sum(variant_values == 1 & other_variant_values == 0) count_matrix[1,2] <- sum(variant_values == 0 & other_variant_values == 1) count_matrix[2,2] <- sum(variant_values == 0 & other_variant_values == 0) - result <- fisher.test(x = count_matrix) + result <- stats::fisher.test(x = count_matrix) result <- c(result$p.value, result$estimate, count_matrix[1,1], count_matrix[2,1], count_matrix[1,2], count_matrix[2,2]) } else{ #print("We do not have more than 2 cells for the somatic variant.") diff --git a/R/CalculateQuality.R b/R/CalculateQuality.R index 5467d23..0daf005 100644 --- a/R/CalculateQuality.R +++ b/R/CalculateQuality.R @@ -5,11 +5,11 @@ #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export -CalculateQuality <- function(SE, variants = rownames(reads_alt), chromosome_prefix = "chrM"){ +CalculateQuality <- function(SE, variants, chromosome_prefix = "chrM"){ variants <- gsub(paste0(chromosome_prefix, "_"), "", variants) qualities <- lapply(c("A", "T", "C", "G"), function(x){ - fwrev <- cbind(assays(SE)[[paste0(x, "_counts_fw")]], assays(SE)[[paste0(x, "_counts_rev")]]) - qualities_fwrev <- cbind(assays(SE)[[paste0(x, "_qual_fw")]], assays(SE)[[paste0(x, "_qual_rev")]]) + fwrev <- cbind(SummarizedExperiment::assays(SE)[[paste0(x, "_counts_fw")]], SummarizedExperiment::assays(SE)[[paste0(x, "_counts_rev")]]) + qualities_fwrev <- cbind(SummarizedExperiment::assays(SE)[[paste0(x, "_qual_fw")]], SummarizedExperiment::assays(SE)[[paste0(x, "_qual_rev")]]) variants_use <- strsplit(variants, "") variants_use <- sapply(variants_use, tail, n = 1) variants_use <- variants_use == x diff --git a/R/CalculateStrandCorrelation.R b/R/CalculateStrandCorrelation.R index 1731609..ae3d434 100644 --- a/R/CalculateStrandCorrelation.R +++ b/R/CalculateStrandCorrelation.R @@ -6,74 +6,74 @@ #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) variants_A <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_A") variants_A <- variants_A[ref_allele != "A"] - reads_A_fw <- assays(SE)[["A_counts_fw"]] - reads_A_rev <- assays(SE)[["A_counts_rev"]] + reads_A_fw <- SummarizedExperiment::assays(SE)[["A_counts_fw"]] + reads_A_rev <- SummarizedExperiment::assays(SE)[["A_counts_rev"]] rownames(reads_A_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A_fw), "_", ref_allele, "_A") rownames(reads_A_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A_rev), "_", ref_allele, "_A") reads_A_fw <- reads_A_fw[ref_allele != "A",] reads_A_rev <- reads_A_rev[ref_allele != "A",] - dt <- merge(data.table(summary(reads_A_fw)), - data.table(summary(reads_A_rev)), + dt <- merge(data.table::data.table(summary(reads_A_fw)), + data.table::data.table(summary(reads_A_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_A[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_A <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_A[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_A <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_C <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_C") variants_C <- variants_C[ref_allele != "C"] - reads_C_fw <- assays(SE)[["C_counts_fw"]] - reads_C_rev <- assays(SE)[["C_counts_rev"]] + reads_C_fw <- SummarizedExperiment::assays(SE)[["C_counts_fw"]] + reads_C_rev <- SummarizedExperiment::assays(SE)[["C_counts_rev"]] rownames(reads_C_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C_fw), "_", ref_allele, "_C") rownames(reads_C_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C_rev), "_", ref_allele, "_C") reads_C_fw <- reads_C_fw[ref_allele != "C",] reads_C_rev <- reads_C_rev[ref_allele != "C",] - dt <- merge(data.table(summary(reads_C_fw)), - data.table(summary(reads_C_rev)), + dt <- merge(data.table::data.table(summary(reads_C_fw)), + data.table::data.table(summary(reads_C_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_C[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_C <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_C[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_C <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_G <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_G") variants_G <- variants_G[ref_allele != "G"] - reads_G_fw <- assays(SE)[["G_counts_fw"]] - reads_G_rev <- assays(SE)[["G_counts_rev"]] + reads_G_fw <- SummarizedExperiment::assays(SE)[["G_counts_fw"]] + reads_G_rev <- SummarizedExperiment::assays(SE)[["G_counts_rev"]] rownames(reads_G_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G_fw), "_", ref_allele, "_G") rownames(reads_G_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G_rev), "_", ref_allele, "_G") reads_G_fw <- reads_G_fw[ref_allele != "G",] reads_G_rev <- reads_G_rev[ref_allele != "G",] - dt <- merge(data.table(summary(reads_G_fw)), - data.table(summary(reads_G_rev)), + dt <- merge(data.table::data.table(summary(reads_G_fw)), + data.table::data.table(summary(reads_G_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_G[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_G <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_G[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_G <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_T <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_T") variants_T <- variants_T[ref_allele != "T"] - reads_T_fw <- assays(SE)[["T_counts_fw"]] - reads_T_rev <- assays(SE)[["T_counts_rev"]] + reads_T_fw <- SummarizedExperiment::assays(SE)[["T_counts_fw"]] + reads_T_rev <- SummarizedExperiment::assays(SE)[["T_counts_rev"]] rownames(reads_T_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T_fw), "_", ref_allele, "_T") rownames(reads_T_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T_rev), "_", ref_allele, "_T") reads_T_fw <- reads_T_fw[ref_allele != "T",] reads_T_rev <- reads_T_rev[ref_allele != "T",] - dt <- merge(data.table(summary(reads_T_fw)), - data.table(summary(reads_T_rev)), + dt <- merge(data.table::data.table(summary(reads_T_fw)), + data.table::data.table(summary(reads_T_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_T[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_T <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_T[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_T <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] cor_results <- rbind(cor_result_A, cor_result_C, cor_result_G, cor_result_T) diff --git a/R/CombineSEobjects.R b/R/CombineSEobjects.R index a5de795..b57a600 100644 --- a/R/CombineSEobjects.R +++ b/R/CombineSEobjects.R @@ -8,21 +8,21 @@ #'@export CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")){ # We check if the assays are equally named. - assay_names_somatic <- names(assays(se_somatic)) - assay_names_MT <- names(assays(se_MT)) + assay_names_somatic <- names(SummarizedExperiment::assays(se_somatic)) + assay_names_MT <- names(SummarizedExperiment::assays(se_MT)) if(!all(assay_names_somatic == assay_names_MT)){ stop("Your assays are not equally named or ordered.") } features <- combine_NAMES(names(se_somatic), names(se_MT)) cells <- combine_NAMES(colnames(se_somatic), colnames(se_MT)) - meta_data_somatic <- colData(se_somatic) - meta_data_MT <- colData(se_MT) + meta_data_somatic <- SummarizedExperiment::colData(se_somatic) + meta_data_MT <- SummarizedExperiment::colData(se_MT) meta_data <- merge(meta_data_somatic, meta_data_MT, by = "Cell", all = TRUE, suffixes = suffixes) meta_data <- meta_data[match(cells, meta_data$Cell),] - meta_row_somatic <- rowData(se_somatic) - meta_row_MT <- rowData(se_MT) + meta_row_somatic <- SummarizedExperiment::rowData(se_somatic) + meta_row_MT <- SummarizedExperiment::rowData(se_MT) if(ncol(meta_row_somatic) > 0 & ncol(meta_row_MT) > 0){ meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] @@ -31,7 +31,7 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")) meta_row_somatic <- matrix(NA, nrow = nrow(meta_row_somatic), ncol = ncol(meta_row_MT)) rownames(meta_row_somatic) <- rownames(se_somatic) colnames(meta_row_somatic) <- colnames(meta_row_MT) - meta_row_somatic <- DataFrame(meta_row_somatic) + meta_row_somatic <- S4Vectors::DataFrame(meta_row_somatic) meta_row_somatic$VariantName <- rownames(meta_row_somatic) meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] @@ -39,31 +39,20 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")) meta_row_MT <- matrix(NA, nrow = nrow(meta_row_MT), ncol = ncol(meta_row_somatic)) rownames(meta_row_MT) <- rownames(se_MT) colnames(meta_row_MT) <- colnames(meta_row_somatic) - meta_row_MT <- DataFrame(meta_row_MT) + meta_row_MT <- S4Vectors::DataFrame(meta_row_MT) meta_row_MT$VariantName <- rownames(meta_row_MT) meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] } - #assays_somatic <- assays(se_somatic) - #assays_somatic <- lapply(assays_somatic, as.matrix) - #assays_MT <- assays(se_MT) - #assays_MT <- lapply(assays_MT, as.matrix) - #assays_combined <- S4Vectors::mendoapply(BiocGenerics::combine, assays_somatic, assays_MT) - #assays_combined[[1]] <- as(assays_combined[[1]], "dgCMatrix") - #assays_combined[[2]] <- as(assays_combined[[2]], "dgCMatrix") - #assays_combined[[3]] <- as(assays_combined[[3]], "dgCMatrix") - #assays_combined[["consensus"]]@x[is.na(assays_combined[["consensus"]]@x)] <- 0 - #assays_combined[["fraction"]]@x[is.na(assays_combined[["fraction"]]@x)] <- 0 - #assays_combined[["coverage"]]@x[is.na(assays_combined[["coverage"]]@x)] <- 0 assays_combined <- lapply(assay_names_somatic, function(x){ - result <- combine_SparseMatrix(assays(se_somatic)[[x]], assays(se_MT)[[x]]) + result <- combine_SparseMatrix(SummarizedExperiment::assays(se_somatic)[[x]], SummarizedExperiment::assays(se_MT)[[x]]) }) names(assays_combined) <- assay_names_somatic - se_combined <- SummarizedExperiment(assays = assays_combined, - colData = meta_data, rowData = meta_row) + + se_combined <- SummarizedExperiment::SummarizedExperiment(assays = assays_combined, colData = meta_data, rowData = meta_row) return(se_combined) } diff --git a/R/Filtering.R b/R/Filtering.R index 67e286f..8f21242 100644 --- a/R/Filtering.R +++ b/R/Filtering.R @@ -56,6 +56,7 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = } } + if(!is.null(fraction_threshold)){ if(any(fraction_threshold >= 1, fraction_threshold <= 0)){ stop("Your fraction threshold is not 0 < x < 1.") @@ -65,8 +66,8 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = print(paste0("We do not set fractions between ", fraction_threshold, " and 1 to 1.")) print("This way, we retain the heterozygous information.") # Filtering using sparse matrices. - consensus_matrix <- assays(se)$consensus - fraction_matrix <- assays(se)$fraction + consensus_matrix <- SummarizedExperiment::assays(se)$consensus + fraction_matrix <- SummarizedExperiment::assays(se)$fraction position_matrix <- summary(fraction_matrix) position_matrix <- subset(position_matrix, x > 0 & x < fraction_threshold) # If no elements fall between 0 and the fraction_threshold, we do not have to change the matrices. @@ -74,8 +75,8 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = ij <- as.matrix(position_matrix[, 1:2]) consensus_matrix[ij] <- reject_value_numeric fraction_matrix[ij] <- 0 - assays(se)$consensus <- consensus_matrix - assays(se)$fraction <- fraction_matrix + SummarizedExperiment::assays(se)$consensus <- consensus_matrix + SummarizedExperiment::assays(se)$fraction <- fraction_matrix } } @@ -89,11 +90,11 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = if(reject_value == "NoCall") print("We set Alts, Refs and Coverage to 0.") if(reject_value == "Reference") print("We set Alts to 0 and adjust the Coverage.") # Filtering using sparse matrices. - consensus_matrix <- assays(se)$consensus - fraction_matrix <- assays(se)$fraction - coverage_matrix <- assays(se)$coverage - alts_matrix <- assays(se)$alts - refs_matrix <- assays(se)$refs + consensus_matrix <- SummarizedExperiment::assays(se)$consensus + fraction_matrix <- SummarizedExperiment::assays(se)$fraction + coverage_matrix <- SummarizedExperiment::assays(se)$coverage + alts_matrix <- SummarizedExperiment::assays(se)$alts + refs_matrix <- SummarizedExperiment::assays(se)$refs position_matrix <- summary(alts_matrix) position_matrix <- subset(position_matrix, x < alts_threshold) # If no elements fall between 0 and the alts_threshold, we do not have to change the matrices. @@ -111,28 +112,28 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = refs_matrix[ij] <- 0 coverage_matrix[ij] <- 0 } - assays(se)$consensus <- consensus_matrix - assays(se)$fraction <- fraction_matrix - assays(se)$coverage <- coverage_matrix - assays(se)$alts <- alts_matrix - assays(se)$refs <- refs_matrix + SummarizedExperiment::assays(se)$consensus <- consensus_matrix + SummarizedExperiment::assays(se)$fraction <- fraction_matrix + SummarizedExperiment::assays(se)$coverage <- coverage_matrix + SummarizedExperiment::assays(se)$alts <- alts_matrix + SummarizedExperiment::assays(se)$refs <- refs_matrix } } print("We remove all the variants that are always NoCall.") - consensus_test <- assays(se)$consensus > 0 + consensus_test <- SummarizedExperiment::assays(se)$consensus > 0 keep_variants <- rowSums(consensus_test) > 0 se <- se[keep_variants,] print(paste0("We remove variants, that are not at least detected in ", min_cells_per_variant, " cells.")) - keep_variants <- rowSums(assays(se)$consensus >= 1) + keep_variants <- rowSums(SummarizedExperiment::assays(se)$consensus >= 1) keep_variants <- keep_variants >= min_cells_per_variant se <- se[keep_variants,] print(paste0("We remove all cells that are not >= 1 (Ref) for at least ", min_variants_per_cell, " variant.")) - consensus_test <- assays(se)$consensus >= 1 + consensus_test <- SummarizedExperiment::assays(se)$consensus >= 1 keep_cells <- colSums(consensus_test) > min_variants_per_cell se <- se[,keep_cells] return(se) diff --git a/R/GetCellInfoPerVariant.R b/R/GetCellInfoPerVariant.R index d3a423b..fc0d0e4 100644 --- a/R/GetCellInfoPerVariant.R +++ b/R/GetCellInfoPerVariant.R @@ -5,16 +5,15 @@ #'@export GetCellInfoPerVariant <- function(se, voi_ch){ print("Generate matrices with coverage, allele frequency and reference / variant reads") - cov_voi_mat <- assays(se)[["coverage"]][voi_ch,] - af_voi_mat <- assays(se)[["fraction"]][voi_ch,] + cov_voi_mat <- SummarizedExperiment::assays(se)[["coverage"]][voi_ch,] + af_voi_mat <- SummarizedExperiment::assays(se)[["fraction"]][voi_ch,] print("Add coverage and allele frequency info from variants of interest to cells_tib.") - cells_tib <- tibble(cell = colnames(se), - Mean_Cov = se$depth) + cells_tib <- tibble::tibble(cell = colnames(se), Mean_Cov = se$depth) for(voi in voi_ch){ cells_tib <- cells_tib %>% - left_join(as_tibble(assays(se)[["coverage"]][voi,], rownames = "cell"), by = "cell") %>% - left_join(as_tibble(assays(se)[["fraction"]][voi,], rownames = "cell"), by = "cell") + dplyr::left_join(tibble::as_tibble(SummarizedExperiment::assays(se)[["coverage"]][voi,], rownames = "cell"), by = "cell") %>% + dplyr::left_join(tibble::as_tibble(SummarizedExperiment::assays(se)[["fraction"]][voi,], rownames = "cell"), by = "cell") colnames(cells_tib) <- gsub("value.x", paste0("cov_", voi), colnames(cells_tib)) colnames(cells_tib) <- gsub("value.y", paste0("af_", voi), colnames(cells_tib)) } diff --git a/R/GetVariantInfo.R b/R/GetVariantInfo.R index 5b2a02f..63d0306 100644 --- a/R/GetVariantInfo.R +++ b/R/GetVariantInfo.R @@ -24,11 +24,11 @@ GetVariantInfo <- function(SE, information = "consensus", variants = NULL, cells stop(paste0("Only ", variants_check, " of ", length(variants), " are in the SE object.")) } # We check if the requested assay is actually present. - assay_check <- information %in% names(assays(SE)) + assay_check <- information %in% names(SummarizedExperiment::assays(SE)) if(!assay_check){ stop("The assay you wants is not present in the object.") } - res <- assays(SE)[[information]][variants, , drop = FALSE] + res <- SummarizedExperiment::assays(SE)[[information]][variants, , drop = FALSE] # We subset the result to only include the cells of interest. # We check if the cells vector is not NULL. if(!is.null(cells)){ diff --git a/R/HeatmapVOI.R b/R/HeatmapVOI.R index 0c79a1a..5e31c70 100644 --- a/R/HeatmapVOI.R +++ b/R/HeatmapVOI.R @@ -8,7 +8,7 @@ #'@export HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, remove_empty_cells = FALSE){ - fraction <- assays(SE)[["fraction"]][voi,] + fraction <- SummarizedExperiment::assays(SE)[["fraction"]][voi,] fraction[is.na(fraction)] <- 0 if(length(voi) == 1){ fraction <- t(as.matrix(fraction)) @@ -22,9 +22,9 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, re fraction <- fraction[,cell_check, drop = FALSE] } if(!is.null(annotation_trait)){ - colours_use <- scales::hue_pal(length(unique(colData(SE)[,annotation_trait]))) - names(colours_use) <- unique(colData(SE)[,annotation_trait]) - ha <- ComplexHeatmap::columnAnnotation(annotation_trait = colData(SE)[,annotation_trait], + colours_use <- scales::hue_pal(length(unique(SummarizedExperiment::colData(SE)[,annotation_trait]))) + names(colours_use) <- unique(SummarizedExperiment::colData(SE)[,annotation_trait]) + ha <- ComplexHeatmap::columnAnnotation(annotation_trait = SummarizedExperiment::colData(SE)[,annotation_trait], col = list(annotation_trait = colours_use)) } else if(is.null(annotation_trait)){ ha <- NULL @@ -35,16 +35,16 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, re column_title <- "Cells" } - heatmap_voi <- Heatmap(fraction, - column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), - row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), - row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"), - col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), - c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")), - show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF", - heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm")), - bottom_annotation = ha, border = T, use_raster = T, - column_title = column_title, - row_title = "Variants") + heatmap_voi <- ComplexHeatmap::Heatmap(fraction, + column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), + row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), + row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"), + col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), + c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")), + show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF", + heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm")), + bottom_annotation = ha, border = T, use_raster = T, + column_title = column_title, + row_title = "Variants") return(heatmap_voi) } diff --git a/R/LoadingMAEGATK_typewise.R b/R/LoadingMAEGATK_typewise.R index 7071597..a17c55b 100755 --- a/R/LoadingMAEGATK_typewise.R +++ b/R/LoadingMAEGATK_typewise.R @@ -42,13 +42,16 @@ LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, se_ls <- list() for(i in 1:nrow(samples_file)){ print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] + input_file_use <- samples_file$input_path[i] sample_use <- samples_file$sample[i] + # We check if the file exists. + if(!file.exists(input_file_use)){ + stop(paste0("Error: the file ", input_file_use, " does not exist.")) + } + # We get the final output file for either mgatk or maegatk. - final_output_file <- list.files(paste0(input_folder_use, sample_use, "/final/"), full.names = TRUE) - final_output_file <- grep(paste0("maegtk.rds|maegatk.rds|mgatk.rds|", sample_use, ".rds"), final_output_file, value = TRUE) - se_ls[[sample_use]] <- load_object(final_output_file) + se_ls[[sample_use]] <- load_object(input_file_use) colnames(se_ls[[sample_use]]) <- paste0(sample_use, "_", colnames(se_ls[[sample_use]])) barcodes_use <- paste0(sample_use, "_", barcodes[[sample_use]][,1]) barcodes_use <- barcodes_use[barcodes_use %in% colnames(se_ls[[sample_use]])] @@ -63,7 +66,7 @@ LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, print("We get the allele frequency.") - fraction <- computeAFMutMatrix(se_merged, chromosome_prefix = chromosome_prefix) + fraction <- computeAFMutMatrix(SE = se_merged, chromosome_prefix = chromosome_prefix) print("We get the coverage information.") @@ -100,80 +103,23 @@ LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, print(paste0("We remove variants, which are not covered in at least ", min_cells, " cells .")) keep_variants <- rowSums(consensus >= 1) keep_variants <- keep_variants >= min_cells - # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus) - variant_names <- names(keep_variants[keep_variants]) - # consensus <- consensus[keep_variants,] - # coverage <- coverage[keep_variants,] - # fraction <- fraction[keep_variants,] - # concordance <- concordance[keep_variants] - # reads_alt <- reads_alt[keep_variants,] - # reads_ref <- reads_ref[keep_variants,] - consensus <- consensus[keep_variants,] - consensus <- suppressWarnings(matrix(consensus, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(consensus) <- cell_ids - rownames(consensus) <- variant_names - consensus <- as(consensus, "dgCMatrix") - coverage <- coverage[keep_variants,] - coverage <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(coverage) <- cell_ids - rownames(coverage) <- variant_names - coverage <- as(coverage, "dgCMatrix") - fraction <- fraction[keep_variants,] - fraction <- suppressWarnings(matrix(fraction, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(fraction) <- cell_ids - rownames(fraction) <- variant_names - fraction <- as(fraction, "dgCMatrix") - concordance <- concordance[keep_variants] + consensus <- consensus[keep_variants, , drop = FALSE] + coverage <- coverage[keep_variants, , drop = FALSE] + fraction <- fraction[keep_variants, , drop = FALSE] + concordance <- concordance[keep_variants] variant_quality <- variant_quality[keep_variants] - reads_alt <- reads_alt[keep_variants,] - reads_alt <- suppressWarnings(matrix(reads_alt, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_alt) <- cell_ids - rownames(reads_alt) <- variant_names - reads_alt <- as(reads_alt, "dgCMatrix") - reads_ref <- reads_ref[keep_variants,] - reads_ref <- suppressWarnings(matrix(reads_ref, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_ref) <- cell_ids - rownames(reads_ref) <- variant_names - reads_ref <- as(reads_ref, "dgCMatrix") + reads_alt <- reads_alt[keep_variants, , drop = FALSE] + reads_ref <- reads_ref[keep_variants, , drop = FALSE] print("We remove cells that are always NoCall.") consensus_test <- consensus > 0 keep_cells <- colSums(consensus_test) > 0 - # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus) - variant_names <- names(keep_variants[keep_variants]) - # consensus <- consensus[,keep_cells] - # coverage <- coverage[,keep_cells] - # fraction <- fraction[,keep_cells] - # reads_alt <- reads_alt[,keep_cells] - # reads_ref <- reads_ref[,keep_cells] - consensus <- consensus[,keep_cells] - consensus <- suppressWarnings(matrix(consensus, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(consensus) <- cell_ids - rownames(consensus) <- variant_names - consensus <- as(consensus, "dgCMatrix") - coverage <- coverage[,keep_cells] - coverage <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(coverage) <- cell_ids - rownames(coverage) <- variant_names - coverage <- as(coverage, "dgCMatrix") - fraction <- fraction[,keep_cells] - fraction <- suppressWarnings(matrix(fraction, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(fraction) <- cell_ids - rownames(fraction) <- variant_names - fraction <- as(fraction, "dgCMatrix") - reads_alt <- reads_alt[,keep_cells] - reads_alt <- suppressWarnings(matrix(reads_alt, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_alt) <- cell_ids - rownames(reads_alt) <- variant_names - reads_alt <- as(reads_alt, "dgCMatrix") - reads_ref <- reads_ref[,keep_cells] - reads_ref <- suppressWarnings(matrix(reads_ref, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_ref) <- cell_ids - rownames(reads_ref) <- variant_names - reads_ref <- as(reads_ref, "dgCMatrix") + consensus <- consensus[, keep_cells, drop = FALSE] + coverage <- coverage[, keep_cells, drop = FALSE] + fraction <- fraction[, keep_cells, drop = FALSE] + reads_alt <- reads_alt[, keep_cells, drop = FALSE] + reads_ref <- reads_ref[, keep_cells, drop = FALSE] # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. @@ -199,8 +145,8 @@ LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, rownames(meta_data_col) <- meta_data_col$Cell meta_data_row <- data.frame(VariantName = rownames(consensus), Concordance = concordance, VariantQuality = variant_quality, Depth = coverage_depth_per_variant) rownames(meta_data_row) <- meta_data_row$VariantName - se_output <- SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = reads_alt, refs = reads_ref), - colData = meta_data_col, rowData = meta_data_row) + se_output <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = reads_alt, refs = reads_ref), + colData = meta_data_col, rowData = meta_data_row) return(se_output) } } diff --git a/R/LoadingVCF_typewise.R b/R/LoadingVCF_typewise.R index 4d3cd37..0db7987 100644 --- a/R/LoadingVCF_typewise.R +++ b/R/LoadingVCF_typewise.R @@ -3,6 +3,8 @@ #'We load a cellwise pileup result from a VCF file. #' #'@import Matrix SummarizedExperiment VariantAnnotation +#'@importFrom GenomeInfoDb seqnames +#'@importFrom BiocGenerics start #'@param samples_path Path to the input folder. Must include a barcodes file. #'@param samples_file Path to the csv file with the samples to be loaded. #'@param vcf_path Path to the VCF file with the variants. @@ -33,25 +35,14 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path } - # A function to convert the heterozyguous/homozyguous information from the VCF to the consensus information from VarTrix. - # We define this function here, since we will not use it anywhere else. - # Maybe move it to a separate function. - char_to_numeric <- function(char_value) { - if(char_value == "1/1") return(2) - if(char_value %in% c("1/0", "0/1")) return(2) - if(char_value == "0/0") return(1) - return(0) - } - - print("We read in the cell barcodes output by CellRanger as a list.") barcodes <- lapply(samples_file$cells, read.table) names(barcodes) <- samples print("We read in the vcf file.") - vcf <- readVcf(vcf_path) - vcf_info <- info(vcf) + vcf <- VariantAnnotation::readVcf(vcf_path) + vcf_info <- VariantAnnotation::info(vcf) print("We load the VCF file.") @@ -61,14 +52,15 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path consensus_matrix_total <- c() for(i in 1:length(samples)){ print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] + input_folder_use <- samples_file$input_path[i] sample_use <- samples_file$sample[i] # The cell barcodes and variants. cellbarcodes_use <- barcodes[[sample_use]] # We load the VCF file. - vcf_data <- paste0(input_folder_use, sample_use, "/cellSNP.cells.sorted.vcf.gz") + # vcf_data <- paste0(input_folder_use, sample_use, "/cellSNP.cells.sorted.vcf.gz") + vcf_data <- paste0(input_folder_use) depth_to_add <- VariantAnnotation::readGeno(vcf_data, "DP") depth_to_add[is.na(depth_to_add)] <- 0 rownames(depth_to_add) <- make.names(rownames(depth_to_add)) @@ -76,14 +68,14 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path depth_to_add <- as(depth_to_add, "sparseMatrix") reads_matrix_total <- cbind(reads_matrix_total, depth_to_add) - alts_to_add <- readGeno(vcf_data, "AD") + alts_to_add <- VariantAnnotation::readGeno(vcf_data, "AD") alts_to_add[is.na(alts_to_add)] <- 0 rownames(alts_to_add) <- make.names(rownames(alts_to_add)) colnames(alts_to_add) <- paste0(sample_use, "_", colnames(alts_to_add)) alts_to_add <- as(alts_to_add, "sparseMatrix") coverage_matrix_total <- cbind(coverage_matrix_total, alts_to_add) - consensus_to_add <- readGeno(vcf_data, "GT") + consensus_to_add <- VariantAnnotation::readGeno(vcf_data, "GT") consensus_to_add <- matrix(sapply(consensus_to_add, char_to_numeric), nrow = nrow(consensus_to_add), dimnames = list(make.names(rownames(consensus_to_add)), paste0(sample_use, "_", colnames(consensus_to_add)))) consensus_to_add <- as(consensus_to_add, "sparseMatrix") consensus_matrix_total <- cbind(consensus_matrix_total, consensus_to_add) @@ -95,12 +87,12 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path print("We generate more accessible names.") if(all(c("GENE", "AA", "CDS") %in% colnames(vcf_info))){ new_names <- paste0(vcf_info$GENE, "_", vcf_info$AA, "_", vcf_info$CDS) - names(new_names) <- make.names(paste0(as.character(rep(seqnames(vcf)@values, seqnames(vcf)@lengths)), ".", start(vcf), "_", as.character(ref(vcf)), ".", as.character(unlist(alt(vcf))))) + names(new_names) <- make.names(paste0(as.character(rep(GenomeInfoDb::seqnames(vcf)@values, GenomeInfoDb::seqnames(vcf)@lengths)), ".", BiocGenerics::start(vcf), "_", as.character(VariantAnnotation::ref(vcf)), ".", as.character(unlist(VariantAnnotation::alt(vcf))))) new_names <- new_names[rownames(ref_matrix_total)] } else{ new_names <- rownames(vcf_info) new_names <- gsub(":|\\/|\\?", "_", new_names) - names(new_names) <- make.names(paste0(as.character(rep(seqnames(vcf)@values, seqnames(vcf)@lengths)), ".", start(vcf), "_", as.character(ref(vcf)), ".", as.character(unlist(alt(vcf))))) + names(new_names) <- make.names(paste0(as.character(rep(GenomeInfoDb::seqnames(vcf)@values, GenomeInfoDb::seqnames(vcf)@lengths)), ".", BiocGenerics::start(vcf), "_", as.character(VariantAnnotation::ref(vcf)), ".", as.character(unlist(VariantAnnotation::alt(vcf))))) new_names <- new_names[rownames(ref_matrix_total)] } @@ -188,11 +180,11 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path print("We transform the sparse matrices to matrices, so we can calculate the fraction.") - coverage_matrix_total <- as.matrix(coverage_matrix_total) - ref_matrix_total <- as.matrix(ref_matrix_total) - consensus_matrix_total <- as.matrix(consensus_matrix_total) - reads_total <- coverage_matrix_total + ref_matrix_total - fraction_total <- coverage_matrix_total / reads_total + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total fraction_total[is.na(fraction_total)] <- 0 gc() @@ -213,11 +205,11 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path rownames(meta_data) <- meta_data$Cell meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) rownames(meta_row) <- meta_row$VariantName - #se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "dgCMatrix"), fraction = as(fraction_total, "dgCMatrix"), coverage = as(reads_total, "dgCMatrix")), - # colData = meta_data) - se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), - alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), - colData = meta_data, rowData = meta_row) + #se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "dgCMatrix"), fraction = as(fraction_total, "dgCMatrix"), coverage = as(reads_total, "dgCMatrix")), + # colData = meta_data) + se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), + alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) return(se_merged) } } diff --git a/R/LoadingVarTrix_typewise.R b/R/LoadingVarTrix_typewise.R index 8b4da86..d1fb577 100644 --- a/R/LoadingVarTrix_typewise.R +++ b/R/LoadingVarTrix_typewise.R @@ -21,28 +21,18 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2){ if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(sample), !is.null(snp_path))){ print(paste0("Loading the data for sample ", sample, ".")) - #samples <- list.files(samples_path) - #samples <- grep(patient, samples, value = TRUE) - - #barcodes_files <- list.files(path = samples_path, pattern = "barcodes") - #barcodes_files <- unlist(lapply(paste0(samples_path, samples, "/"), list.files, pattern = "barcodes", full.names = TRUE)) - - - #samples_file <- data.frame(patient = patient, sample = samples, input_folder = samples_path, cells = barcodes_files) - samples_file <- data.frame(patient = patient, sample = sample, input_folder = samples_path, cells = barcodes_path) + samples_file <- data.frame(patient = patient, sample = sample, input_path = samples_path, cells = barcodes_path) samples <- samples_file$sample } else{ print(paste0("Loading the data for patient ", patient, ".")) print("We read in the samples file.") samples_file <- read.csv(samples_file, stringsAsFactors = FALSE) - - + print("We subset to the patient of interest.") samples_file <- samples_file[grep("vartrix", samples_file$source, ignore.case = TRUE),] samples_file <- samples_file[samples_file$patient == patient,] samples_file <- samples_file[samples_file$type == type_use,] - - + print("We get the different samples.") samples <- samples_file$sample } @@ -52,7 +42,7 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ if(!is.null(snp_path)){ path_snps <- snp_path } else{ - path_snps <- paste0(samples_file$input_folder, "/SNV.loci.txt") + path_snps <- paste0(samples_file$input_path, "/SNV.loci.txt") } @@ -67,8 +57,8 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ print("We read in the vcf file.") - vcf <- readVcf(vcf_path) - vcf_info <- info(vcf) + vcf <- VariantAnnotation::readVcf(vcf_path) + vcf_info <- VariantAnnotation::info(vcf) print("We generate more accessible names.") @@ -86,7 +76,7 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ consensus_matrices <- list() for(i in 1:nrow(samples_file)){ print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] + input_folder_use <- samples_file$input_path[i] sample_use <- samples_file$sample[i] # The cell barcodes and variants. @@ -153,96 +143,47 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ keep_variants <- rowSums(consensus_matrix_total >= 1) keep_variants <- keep_variants >= min_cells # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus_matrix_total) - variant_names <- names(keep_variants[keep_variants]) - consensus_matrix_total <- consensus_matrix_total[keep_variants, ] - consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(consensus_matrix_total) <- cell_ids - rownames(consensus_matrix_total) <- variant_names - consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") - coverage_matrix_total <- coverage_matrix_total[keep_variants, ] - coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(coverage_matrix_total) <- cell_ids - rownames(coverage_matrix_total) <- variant_names - coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") - ref_matrix_total <- ref_matrix_total[keep_variants, ] - ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(ref_matrix_total) <- cell_ids - rownames(ref_matrix_total) <- variant_names - ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") - -# if(sum(keep_variants) > 1){ -# consensus_matrix_total <- consensus_matrix_total[keep_variants,] -# coverage_matrix_total <- coverage_matrix_total[keep_variants,] -# ref_matrix_total <- ref_matrix_total[keep_variants,] -# } else if(sum(keep_variants) == 1){ -# cell_ids <- colnames(consensus_matrix_total) -# variant_names <- names(keep_variants[keep_variants]) -# consensus_matrix_total <- consensus_matrix_total[keep_variants,] -# consensus_matrix_total <- matrix(consensus_matrix_total, nrow = 1, ncol = length(consensus_matrix_total)) -# rownames(consensus_matrix_total) <- variant_names -# colnames(consensus_matrix_total) <- cell_ids -# consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") -# coverage_matrix_total <- coverage_matrix_total[keep_variants,] -# coverage_matrix_total <- matrix(coverage_matrix_total, nrow = 1, ncol = length(coverage_matrix_total)) -# rownames(coverage_matrix_total) <- variant_names -# colnames(coverage_matrix_total) <- cell_ids -# coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") -# ref_matrix_total <- ref_matrix_total[keep_variants,] -# ref_matrix_total <- matrix(ref_matrix_total, nrow = 1, ncol = length(ref_matrix_total)) -# rownames(ref_matrix_total) <- variant_names -# colnames(ref_matrix_total) <- cell_ids -# ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") -# rm(cell_ids, variant_names) -# } + #cell_ids <- colnames(consensus_matrix_total) + #variant_names <- names(keep_variants[keep_variants]) + consensus_matrix_total <- consensus_matrix_total[keep_variants, , drop = FALSE] + #consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(consensus_matrix_total) <- cell_ids + #rownames(consensus_matrix_total) <- variant_names + #consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[keep_variants, , drop = FALSE] + #coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(coverage_matrix_total) <- cell_ids + #rownames(coverage_matrix_total) <- variant_names + #coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[keep_variants, , drop = FALSE] + #ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(ref_matrix_total) <- cell_ids + #rownames(ref_matrix_total) <- variant_names + #ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") print("We remove cells that are always NoCall.") consensus_test <- consensus_matrix_total > 0 keep_cells <- colSums(consensus_test) > 0 # If we only have one cell or one variant, we loose the matrix. - cell_ids <- names(keep_cells[keep_cells]) - variant_names <- rownames(consensus_matrix_total) - consensus_matrix_total <- consensus_matrix_total[, keep_cells] - consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(consensus_matrix_total) <- cell_ids - rownames(consensus_matrix_total) <- variant_names - consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") - coverage_matrix_total <- coverage_matrix_total[, keep_cells] - coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(coverage_matrix_total) <- cell_ids - rownames(coverage_matrix_total) <- variant_names - coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") - ref_matrix_total <- ref_matrix_total[, keep_cells] - ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(ref_matrix_total) <- cell_ids - rownames(ref_matrix_total) <- variant_names - ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") - -# if(sum(keep_cells) > 1){ -# consensus_matrix_total <- consensus_matrix_total[, keep_cells] -# coverage_matrix_total <- coverage_matrix_total[, keep_cells] -# ref_matrix_total <- ref_matrix_total[, keep_cells] -# } else if(sum(keep_cells) == 1){ -# cell_ids <- names(keep_cells[keep_cells]) -# variant_names <- rownames(consensus_matrix_total) -# consensus_matrix_total <- consensus_matrix_total[,keep_cells] -# consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(consensus_matrix_total) <- variant_names -# colnames(consensus_matrix_total) <- cell_ids -# consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") -# coverage_matrix_total <- coverage_matrix_total[,keep_cells] -# coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(coverage_matrix_total) <- variant_names -# colnames(coverage_matrix_total) <- cell_ids -# coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") -# ref_matrix_total <- ref_matrix_total[, keep_cells] -# ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(ref_matrix_total) <- variant_names -# colnames(ref_matrix_total) <- cell_ids -# ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") -# rm(cell_ids, variant_names) -# } + #cell_ids <- names(keep_cells[keep_cells]) + #variant_names <- rownames(consensus_matrix_total) + consensus_matrix_total <- consensus_matrix_total[, keep_cells, drop = FALSE] + #consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(consensus_matrix_total) <- cell_ids + #rownames(consensus_matrix_total) <- variant_names + #consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[, keep_cells, drop = FALSE] + #coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(coverage_matrix_total) <- cell_ids + #rownames(coverage_matrix_total) <- variant_names + #coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[, keep_cells, drop = FALSE] + #ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(ref_matrix_total) <- cell_ids + #rownames(ref_matrix_total) <- variant_names + #ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") + print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) @@ -252,32 +193,12 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ print("We transform the sparse matrices to matrices, so we can calculate the fraction.") - # For test purposes - #coverage_matrix_total_ori <- coverage_matrix_total - #ref_matrix_total_ori <- ref_matrix_total - #consensus_matrix_total_ori <- consensus_matrix_total - #coverage_matrix_total <- coverage_matrix_total_ori - #ref_matrix_total <- ref_matrix_total_ori - #consensus_matrix_total <- consensus_matrix_total_ori - - #coverage_matrix_total <- coverage_matrix_total[1:5000,1:5000] - #ref_matrix_total <- ref_matrix_total[1:5000,1:5000] - #consensus_matrix_total <- coverage_matrix_total[1:5000,1:5000] - #colnames(coverage_matrix_total) <- make.names(colnames(coverage_matrix_total)) - #rownames(coverage_matrix_total) <- make.names(rownames(coverage_matrix_total)) - #colnames(ref_matrix_total) <- make.names(colnames(ref_matrix_total)) - #rownames(ref_matrix_total) <- make.names(rownames(ref_matrix_total)) - #colnames(consensus_matrix_total) <- make.names(colnames(consensus_matrix_total)) - #rownames(consensus_matrix_total) <- make.names(rownames(consensus_matrix_total)) - coverage_matrix_total <- as.matrix(coverage_matrix_total) - ref_matrix_total <- as.matrix(ref_matrix_total) - consensus_matrix_total <- as.matrix(consensus_matrix_total) - reads_total <- coverage_matrix_total + ref_matrix_total - fraction_total <- coverage_matrix_total / reads_total + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total fraction_total[is.na(fraction_total)] <- 0 - #fraction_total <- sdiv(X = coverage_matrix_total, Y = reads_total, - # names = dimnames(coverage_matrix_total)) - #rm(coverage_matrix_total, ref_matrix_total) gc() @@ -297,11 +218,9 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ rownames(meta_data) <- meta_data$Cell meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) rownames(meta_row) <- meta_row$VariantName - #se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "dgCMatrix"), fraction = as(fraction_total, "dgCMatrix"), coverage = as(reads_total, "dgCMatrix")), - # colData = meta_data) - se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), - alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), - colData = meta_data, rowData = meta_row) + se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), + alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) return(se_merged) } } diff --git a/R/RowWiseSplit.R b/R/RowWiseSplit.R index bac1734..1fb5b39 100644 --- a/R/RowWiseSplit.R +++ b/R/RowWiseSplit.R @@ -9,8 +9,8 @@ #'@param remove_nocalls Do you want to remove NoCall cells? #'@export RowWiseSplit <- function(se, n_cores = 1, remove_nocalls = TRUE){ - consensus <- assays(se)$consensus - consensus_list <- mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores) + consensus <- SummarizedExperiment::assays(se)$consensus + consensus_list <- parallel::mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores) names(consensus_list) <- rownames(se) return(consensus_list) } diff --git a/R/SetVariantInfo.R b/R/SetVariantInfo.R index 584ea06..0763850 100644 --- a/R/SetVariantInfo.R +++ b/R/SetVariantInfo.R @@ -28,7 +28,7 @@ SetVariantInfo <- function(SE, seurat_object, information = "consensus", variant if(!assay_check){ stop("The assay you wants is not present in the object.") } - res <- t(assays(SE)[[information]][variants, , drop = FALSE]) + res <- t(SummarizedExperiment::assays(SE)[[information]][variants, , drop = FALSE]) # We check if all the cells are actually in the Seurat object. # If not, we only add the information for the ones present. # We execute an error function if there are zero cells present. diff --git a/R/VariantBurden.R b/R/VariantBurden.R index 208dab1..5fbc46e 100644 --- a/R/VariantBurden.R +++ b/R/VariantBurden.R @@ -6,7 +6,7 @@ #'@param se SummarizedExperiment object #'@export VariantBurden <- function(se){ - burden <- colSums(assays(se)[["fraction"]]) - colData(se)[,"Burden"] <- burden + burden <- colSums(SummarizedExperiment::assays(se)[["fraction"]]) + SummarizedExperiment::colData(se)[,"Burden"] <- burden return(se) } diff --git a/R/VariantCloneSizeThresholding.R b/R/VariantCloneSizeThresholding.R index f1a4c24..759710d 100644 --- a/R/VariantCloneSizeThresholding.R +++ b/R/VariantCloneSizeThresholding.R @@ -2,7 +2,7 @@ #'@description #'We get variants of interest using a clone size thresholding. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import dplyr Matrix SummarizedExperiment tidyverse +#'@import Matrix SummarizedExperiment tidyverse #'@param se SummarizedExperiment object. #'@param min_coverage Minimum coverage a variant needs to have. #'@param fraction_negative_cells The fraction of negative cells needed. @@ -10,33 +10,23 @@ #'@param vaf_threshold Variant Allele Threshold. Cells above this threshold are considered mutated. #'@export VariantCloneSizeThresholding <- function(se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5){ - # This function is adapted from the Peter van Galen. print("Get the mean allele frequency and coverage.") - mean_af <- rowMeans(assays(se)[["fraction"]], na.rm = TRUE) - mean_cov <- rowMeans(assays(se)[["coverage"]], na.rm = TRUE) + mean_af <- rowMeans(SummarizedExperiment::assays(se)[["fraction"]], na.rm = TRUE) + mean_cov <- rowMeans(SummarizedExperiment::assays(se)[["coverage"]], na.rm = TRUE) print("Collect all information in a tibble") - #vars_tib <- as_tibble(do.call(cbind, c(list(mean_af), list(mean_cov))), rownames = "var") vars <- do.call(cbind, c(list(mean_af), list(mean_cov))) - #colnames(vars_tib)[2] <- "mean_af" - #colnames(vars_tib)[3] <- "mean_cov" colnames(vars) <- c("mean_af", "mean_cov") print("We add the number of cells that exceed the VAF thresholds.") - #vars_tib <- vars_tib %>% - # mutate(n0 = apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE))) %>% - # mutate(VAF_threshold = apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE))) - n0 <- apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) - VAF_threshold <- apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) + n0 <- apply(SummarizedExperiment::assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) + VAF_threshold <- apply(SummarizedExperiment::assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) vars <- cbind(vars, n0, VAF_threshold) print("Thresholding using the clone size approach.") - #voi_ch <- subset(vars_tib, mean_cov > min_coverage & - # n0 > ceiling(fraction_negative_cells * ncol(se)) & - # VAF_threshold > min_clone_size)$var voi_ch <- subset(vars, mean_cov > min_coverage & - n0 > ceiling(fraction_negative_cells * ncol(se)) & - VAF_threshold > min_clone_size) + n0 > ceiling(fraction_negative_cells * ncol(se)) & + VAF_threshold > min_clone_size) voi_ch <- rownames(voi_ch) return(voi_ch) } diff --git a/R/VariantCorrelationHeatmap.R b/R/VariantCorrelationHeatmap.R index 7892988..7dbbc27 100644 --- a/R/VariantCorrelationHeatmap.R +++ b/R/VariantCorrelationHeatmap.R @@ -1,7 +1,9 @@ #'VariantCorrelationHeatmap #'@description #'We generate a heatmap showing the correlation of somatic variants with the MT variants. -#'@import circlize ComplexHeatmap ggplot2 Matrix parallel rcompanion tidyr grid +#'Packages I want to remove. I cannot see where they are used. +#'ggplot2 parallel rcompanion tidyr +#'@import circlize ComplexHeatmap Matrix grid #'@param correlation_results Data.frame with the correlation results. #'@param output_path Path to the output folder. #'@param patient The patient for this heatmap. @@ -38,7 +40,7 @@ VariantCorrelationHeatmap <- function(correlation_results, output_path = NULL, p pvalue_max <- max(pvalue_max, 100) } correlation_results$P_adj_logged[correlation_results$P_adj_logged == Inf] <- pvalue_max - col_fun <- colorRamp2(c(0,pvalue_max), c("white", "red")) + col_fun <- circlize::colorRamp2(c(0,pvalue_max), c("white", "red")) print("We set insignificant P values to NA.") @@ -66,16 +68,16 @@ VariantCorrelationHeatmap <- function(correlation_results, output_path = NULL, p print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") if(all(dim(p_values) > 0)){ print("Generating the actual heat map.") - p1 <- Heatmap(p_values, name = "-log10(P)", - column_title = paste0("Patient ", patient, "\nLogged adj. P values between the mutations"), - row_title = "", show_row_names = TRUE, show_column_names = TRUE, - col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, - column_title_gp = grid::gpar(fontsize = 40), row_title_gp = grid::gpar(fontsize = 40), - column_names_gp = grid::gpar(fontsize = 40), row_names_gp = grid::gpar(fontsize = 40), - column_names_rot = 45, - row_names_side = "left", - heatmap_legend_param = list(labels_gp = gpar(fontsize = 40), title_gp = gpar(fontsize = 40, fontface = "bold")), - cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) + p1 <- ComplexHeatmap::Heatmap(p_values, name = "-log10(P)", + column_title = paste0("Patient ", patient, "\nLogged adj. P values between the mutations"), + row_title = "", show_row_names = TRUE, show_column_names = TRUE, + col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, + column_title_gp = grid::gpar(fontsize = 40), row_title_gp = grid::gpar(fontsize = 40), + column_names_gp = grid::gpar(fontsize = 40), row_names_gp = grid::gpar(fontsize = 40), + column_names_rot = 45, + row_names_side = "left", + heatmap_legend_param = list(labels_gp = grid::gpar(fontsize = 40), title_gp = grid::gpar(fontsize = 40, fontface = "bold")), + cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) if(!is.null(output_path)){ diff --git a/R/VariantFisherTestHeatmap.R b/R/VariantFisherTestHeatmap.R index 3efe177..28b1450 100644 --- a/R/VariantFisherTestHeatmap.R +++ b/R/VariantFisherTestHeatmap.R @@ -1,7 +1,9 @@ #'VariantFisherTestHeatmap #'@description #'We generate a heatmap showing the Fisher test of somatic variants with the MT variants. -#'@import circlize ComplexHeatmap ggplot2 Matrix parallel rcompanion tidyr grid +#'Packages I want to remove. +#'ggplot2 parallel rcompanion tidyr +#'@import circlize ComplexHeatmap Matrix grid #'@param fisher_results Data.frame with the correlation results. #'@param patient The patient for this heatmap. #'@param min_alt_cells Minimum number of mutated cells needed, otherwise an association will not be plotted. @@ -12,13 +14,13 @@ VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, fisher_results <- subset(fisher_results, P_adj_logged > -log10(0.05)) fisher_results <- subset(fisher_results, Cells_Alt_1_2 >= min_alt_cells) fisher_results <- subset(fisher_results, OddsRatio > min_oddsratio) - - + + print("We get the unique variants.") somatic_uniques <- unique(fisher_results$Variant1) mt_uniques <- unique(fisher_results$Variant2) - - + + print("Getting the maximum P value.") pvalue_max <- as.numeric(na.omit(fisher_results$P_adj_logged)) if(length(pvalue_max) > 1){ @@ -33,7 +35,7 @@ VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, pvalue_max <- max(pvalue_max, 100) } fisher_results$P_adj_logged[fisher_results$P_adj_logged == Inf] <- pvalue_max - col_fun <- colorRamp2(c(0,pvalue_max), c("white", "red")) + col_fun <- circlize::colorRamp2(c(0,pvalue_max), c("white", "red")) print("We set insignificant P values to NA.") @@ -48,7 +50,7 @@ VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, fisher_results_subset <- subset(fisher_results, Variant1 == somatic_uniques[i]) p_values_use <- fisher_results_subset$P_adj_logged names(p_values_use) <- fisher_results_subset$Variant2 - p_values[somatic_uniques[i],names(p_values_use)] <- p_values_use + p_values[somatic_uniques[i], names(p_values_use)] <- p_values_use } @@ -61,13 +63,13 @@ VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") if(all(dim(p_values) > 0)){ print("Generating the actual heat map.") - p <- Heatmap(p_values, name = "-log10(P)", - column_title = paste0("Patient ", patient, "\nLogged adj. P values between the variants"), - row_title = "", show_row_names = TRUE, show_column_names = TRUE, - col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, - column_names_rot = 45, row_names_side = "left", - column_names_gp = grid::gpar(hjust = 1), - cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) + p <- ComplexHeatmap::Heatmap(p_values, name = "-log10(P)", + column_title = paste0("Patient ", patient, "\nLogged adj. P values between the variants"), + row_title = "", show_row_names = TRUE, show_column_names = TRUE, + col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, + column_names_rot = 45, row_names_side = "left", + column_names_gp = grid::gpar(hjust = 1), + cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) } return(p) } diff --git a/R/VariantQuantileThresholding.R b/R/VariantQuantileThresholding.R index a9e3b97..60a63ae 100755 --- a/R/VariantQuantileThresholding.R +++ b/R/VariantQuantileThresholding.R @@ -21,28 +21,27 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = NULL, mean_allele_frequency = 0, group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL){ print("Get the mean allele frequency and coverage.") - mean_af <- rowMeans(assays(SE)[["fraction"]], na.rm = TRUE) - mean_cov <- rowMeans(assays(SE)[["coverage"]], na.rm = TRUE) + mean_af <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]], na.rm = TRUE) + mean_cov <- rowMeans(SummarizedExperiment::assays(SE)[["coverage"]], na.rm = TRUE) if(all(is.null(min_quality), is.numeric(min_quality))) stop("Error: Your minimum quality is not either NULL or a numeric.") if(all(is.null(mean_allele_frequency), is.numeric(mean_allele_frequency))) stop("Error: Your mean allele frequency is not either NULL or a numeric.") if(all(!is.null(group_of_interest), !is.null(group1), !is.null(group2))){ - if(!group_of_interest %in% colnames(colData(SE))) stop("Error: Your group_of_interest is not in the colData.") - if(!group1 %in% colData(SE)[,group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") - if(!group2 %in% colData(SE)[,group_of_interest]) stop("Error: Your group2 is not in the group_of_interest.") - cells_group1 <- colData(SE)[,group_of_interest, drop = FALSE] + if(!group_of_interest %in% colnames(SummarizedExperiment::colData(SE))) stop("Error: Your group_of_interest is not in the colData.") + if(!group1 %in% SummarizedExperiment::colData(SE)[, group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") + if(!group2 %in% SummarizedExperiment::colData(SE)[, group_of_interest]) stop("Error: Your group2 is not in the group_of_interest.") + cells_group1 <- SummarizedExperiment::colData(SE)[, group_of_interest, drop = FALSE] cells_group1 <- cells_group1[cells_group1[, group_of_interest] == group1, , drop = FALSE] - cells_group2 <- colData(SE)[,group_of_interest, drop = FALSE] + cells_group2 <- SummarizedExperiment::colData(SE)[, group_of_interest, drop = FALSE] cells_group2 <- cells_group2[cells_group2[, group_of_interest] == group2, , drop = FALSE] - mean_af_group1 <- rowMeans(assays(SE)[["fraction"]][,rownames(cells_group1)], na.rm = TRUE) - mean_af_group2 <- rowMeans(assays(SE)[["fraction"]][,rownames(cells_group2)], na.rm = TRUE) + mean_af_group1 <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]][, rownames(cells_group1)], na.rm = TRUE) + mean_af_group2 <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]][, rownames(cells_group2)], na.rm = TRUE) mean_af_group_check <- mean_af_group1 > (group_factor * mean_af_group2) print("Get the quantiles of the VAFs of each variant.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) if(!is.null(min_quality)){ - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) vars <- vars[is.na(vars$VariantQuality), ] vars <- subset(vars, VariantQuality > min_quality) } else{ @@ -51,18 +50,17 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, vars <- vars[mean_af_group_check,] print("Thresholding using the quantile approach.") - if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") + if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") - #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) } else if(any(is.null(top_cells), is.null(top_VAF))){ print("Get the quantiles of the VAFs of each variant.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) if(!is.null(min_quality)){ - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) vars <- vars[is.na(vars$VariantQuality), ] vars <- subset(vars, VariantQuality > min_quality) } else{ @@ -72,19 +70,18 @@ VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, print("Thresholding using the quantile approach.") if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") - #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) } else{ print("Get the quantile of the VAF of each variant.") if(length(quantiles) > 1) stop("You are providing more than 1 quantile. You should only provide 1.") if(length(thresholds) > 1) stop("You are providing more than 1 threshold. You should only provide 1.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) quantiles <- quantiles[[1]] - top_cells_values <- assays(SE)[["fraction"]] + top_cells_values <- SummarizedExperiment::assays(SE)[["fraction"]] top_cells_values <- top_cells_values > top_VAF top_cells_values <- rowSums(top_cells_values) if(!is.null(min_quality)){ - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) vars <- vars[is.na(vars$VariantQuality), ] vars <- subset(vars, VariantQuality > min_quality) } else{ diff --git a/R/VariantWiseCorrelation.R b/R/VariantWiseCorrelation.R index 658a6a0..02564e5 100644 --- a/R/VariantWiseCorrelation.R +++ b/R/VariantWiseCorrelation.R @@ -2,7 +2,9 @@ #'@description #'We correlate the variants with each other using the Pearson correlation. #'This function calls CalculateCorrelationPValue to perform the actual correlation. -#'@import Matrix parallel SummarizedExperiment +#'Packages I want to remove. +#'SummarizedExperiment +#'@import Matrix parallel #'@param variants_list List of fraction values. #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. @@ -22,7 +24,7 @@ VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustmen variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], Corr = results[,2], Cells_1_Alt = results[,3], Cells_1_Ref = results[,4], Cells_2_Alt = results[,5], Cells_2_Ref = results[,6]) diff --git a/R/VariantWiseFisherTest.R b/R/VariantWiseFisherTest.R index bcfe6cd..f048e53 100644 --- a/R/VariantWiseFisherTest.R +++ b/R/VariantWiseFisherTest.R @@ -2,7 +2,9 @@ #'@description #'We perform the Fisher test to determine which variants are associated. #'This function calls CalculateFisherTestPValue to perform the actual testing. -#'@import Matrix parallel SummarizedExperiment +#'Packages I want to remove. +#'SummarizedExperiment +#'@import Matrix parallel #'@param variants_list List of fraction values. #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. @@ -22,7 +24,7 @@ VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- mclapply(X = all_variants, CalculateFisherTestPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateFisherTestPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], OddsRatio = results[,2], Cells_Alt_1_2 = results[,3], Cells_Alt_1_Ref_2 = results[,4], Cells_Alt_2_Ref_1 = results[,5], Cells_Ref_1_2 = results[,6]) diff --git a/R/char_to_numeric.R b/R/char_to_numeric.R new file mode 100644 index 0000000..e552561 --- /dev/null +++ b/R/char_to_numeric.R @@ -0,0 +1,12 @@ +#'char_to_numeric +#'@description +#'A function to convert the heterozyguous/homozyguous information from the VCF to the consensus information from VarTrix. +#'It is only used in LoadingVCF_typewise.R. +#'@param char_value +#'@export +char_to_numeric <- function(char_value) { + if(char_value == "1/1") return(2) + if(char_value %in% c("1/0", "0/1")) return(2) + if(char_value == "0/0") return(1) + return(0) +} diff --git a/R/combine_NAMES.R b/R/combine_NAMES.R index 46d8450..6f97c1c 100644 --- a/R/combine_NAMES.R +++ b/R/combine_NAMES.R @@ -6,5 +6,6 @@ #'@export combine_NAMES <- function(x, y) { shared_names <- intersect(x, y) - c(x, setdiff(y, shared_names)) + combined_names <- c(x, setdiff(y, shared_names)) + return(combined_names) } diff --git a/R/combine_SparseMatrix.R b/R/combine_SparseMatrix.R index f3ff079..d9ee009 100644 --- a/R/combine_SparseMatrix.R +++ b/R/combine_SparseMatrix.R @@ -41,8 +41,8 @@ combine_SparseMatrix <- function(matrix_1, matrix_2){ positions_2[,"j"] <- new_cols[positions_2[,"j"]] positions_combined <- rbind(positions_1, positions_2) - result <- sparseMatrix(i = positions_combined[,"i"], j = positions_combined[,"j"], x = positions_combined[,"x"], - dimnames = list(variants_unique, cells_unique), dims = c(length(variants_unique), length(cells_unique))) + result <- Matrix::sparseMatrix(i = positions_combined[,"i"], j = positions_combined[,"j"], x = positions_combined[,"x"], + dimnames = list(variants_unique, cells_unique), dims = c(length(variants_unique), length(cells_unique))) } diff --git a/R/computeAFMutMatrix.R b/R/computeAFMutMatrix.R index a4cf860..6be603e 100644 --- a/R/computeAFMutMatrix.R +++ b/R/computeAFMutMatrix.R @@ -6,49 +6,22 @@ #'See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected #'and https://github.com/caleblareau/mgatk/issues/1 #'We simply set these values to 1, since that is the actual information we have in this case. -#'This issue can be solved on the MAEGATK/GATK side. #'@import SummarizedExperiment #'@param SE SummarizedExperiment object. +#'@param chromosome_prefix The prefix of the chromosome. #'@export computeAFMutMatrix <- function(SE, chromosome_prefix = "chrM"){ - cov <- assays(SE)[["coverage"]] + 0.000001 - ref_allele <- as.character(rowRanges(SE)$refAllele) + cov <- SummarizedExperiment::assays(SE)[["coverage"]] + 0.000001 + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) - getMutMatrix <- function(letter){ - names_rows <- paste0(chromosome_prefix, "_", 1:nrow(cov), "_", toupper(ref_allele), "_", letter) - names_rows <- names_rows[toupper(ref_allele) != letter] - mat_fow <- assays(SE)[[paste0(letter, "_counts_fw")]] - mat_rev <- assays(SE)[[paste0(letter, "_counts_rev")]] - mat <- mat_fow + mat_rev - mat <- mat[toupper(ref_allele) != letter,] - cov_use <- cov[toupper(ref_allele) != letter,] - mat <- mat / cov_use - gc() - # We can get AF values greater than 1, which is due to uninformative reads. - # See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected - # and https://github.com/caleblareau/mgatk/issues/1 - # We simply set these values to 1, since that is the actual information we have in this case. - # This issue can be solved on the MAEGATK/GATK side. - mat[mat > 1] <- 1 - rownames(mat) <- names_rows - #mat <- as(mat, "dgCMatrix") - mat <- as(mat, "CsparseMatrix") - return(mat) - } - - A_matrix <- getMutMatrix("A") - #A_matrix <- as.matrix(A_matrix) + A_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "A", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - C_matrix <- getMutMatrix("C") - #C_matrix <- as.matrix(C_matrix) + C_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "C", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - G_matrix <- getMutMatrix("G") - #G_matrix <- as.matrix(G_matrix) + G_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "G", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - T_matrix <- getMutMatrix("T") - #T_matrix <- as.matrix(T_matrix) + T_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "T", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() result <- rbind(A_matrix, C_matrix, G_matrix, T_matrix) -# result <- as.matrix(result) return(result) } diff --git a/R/getAltMatrix.R b/R/getAltMatrix.R index 0979eee..d062607 100644 --- a/R/getAltMatrix.R +++ b/R/getAltMatrix.R @@ -9,8 +9,8 @@ #'@param chromosome_prefix The chromosome prefix used. #'@export getAltMatrix <- function(SE_object, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE_object)$refAllele) - mat <- (assays(SE_object)[[paste0(letter, "_counts_fw")]] + assays(SE_object)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE_object)$refAllele) + mat <- (SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", as.character(1:dim(mat)[1]), "_", toupper(ref_allele), ">", letter) mat <- mat[toupper(ref_allele) != letter,] return(mat) diff --git a/R/getMutMatrix.R b/R/getMutMatrix.R new file mode 100644 index 0000000..a550163 --- /dev/null +++ b/R/getMutMatrix.R @@ -0,0 +1,25 @@ +#'getMutMatrix +#'@description +#'This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +#'Source: https://github.com/petervangalen/MAESTER-2021 +#'@import SummarizedExperiment +#'@param SE SummarizedExperiment object. +#'@param cov The coverage matrix from MAEGATK/MGATK. +#'@param letter The base we are interested in. +#'@param ref_allele Vector of reference alleles. +#'@export +getMutMatrix <- function(SE, cov, letter, ref_allele, chromosome_prefix){ + names_rows <- paste0(chromosome_prefix, "_", 1:nrow(cov), "_", toupper(ref_allele), "_", letter) + names_rows <- names_rows[toupper(ref_allele) != letter] + mat_fow <- SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_fw")]] + mat_rev <- SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_rev")]] + mat <- mat_fow + mat_rev + mat <- mat[toupper(ref_allele) != letter,] + cov_use <- cov[toupper(ref_allele) != letter,] + mat <- mat / cov_use + gc() + mat[mat > 1] <- 1 + rownames(mat) <- names_rows + mat <- as(mat, "CsparseMatrix") + return(mat) +} diff --git a/R/getReadMatrix.R b/R/getReadMatrix.R index 6b5e693..e9e2f39 100644 --- a/R/getReadMatrix.R +++ b/R/getReadMatrix.R @@ -5,8 +5,8 @@ #'@param chromosome_prefix The chromosome name used as a prefix. #'@export getReadMatrix <- function(SE, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - mat <- (assays(SE)[[paste0(letter, "_counts_fw")]] + assays(SE)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + mat <- (SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", 1:nrow(mat), "_", toupper(ref_allele), "_", letter) return(mat) } diff --git a/R/getRefMatrix.R b/R/getRefMatrix.R index e0374d4..3ed3591 100644 --- a/R/getRefMatrix.R +++ b/R/getRefMatrix.R @@ -9,8 +9,8 @@ #'@param chromosome_prefix The chromosome prefix used. #'@export getRefMatrix <- function(SE_object, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE_object)$refAllele) - mat <- (assays(SE_object)[[paste0(letter, "_counts_fw")]] + assays(SE_object)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE_object)$refAllele) + mat <- (SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", as.character(1:dim(mat)[1]), "_", toupper(ref_allele), ">", letter) mat <- mat[toupper(ref_allele) %in% letter,] return(mat) diff --git a/R/get_consensus.R b/R/get_consensus.R index 1d5ebd2..78514c5 100644 --- a/R/get_consensus.R +++ b/R/get_consensus.R @@ -1,7 +1,9 @@ #'get_consensus #'@description #'We get the consensus information for a specific matrix. -#'@import dplyr MatrixGenerics SummarizedExperiment +#'I want to remove some packages if they are not needed. See below which package apperantly wasn't needed. +#'Package to remove: dplyr, SummarizedExperiment +#'@import MatrixGenerics #'@param letter The alternative base. #'@param ref_base The reference base. #'@param input_matrix Input matrix with the present reads numerically encoded. @@ -20,8 +22,6 @@ get_consensus <- function(alt_base, ref_base, input_matrix, chromosome_prefix = # Both is not accurate in this context. Therefore, we set these cases to 0 (NoCall). other_homo_values <- base_numeric[!base_numeric %in% base_numeric[c(alt_base, ref_base)]] - - #output_matrix <- input_matrix output_matrix <- matrix(0, nrow = nrow(input_matrix), ncol = ncol(input_matrix)) rownames(output_matrix) <- rownames(input_matrix) colnames(output_matrix) <- colnames(input_matrix) diff --git a/R/ggsci_pal.R b/R/ggsci_pal.R index 254506d..27208b1 100644 --- a/R/ggsci_pal.R +++ b/R/ggsci_pal.R @@ -14,8 +14,7 @@ #'ucscgb: 26 #'@export ggsci_pal <- function(option, ...){ - func_name = glue("pal_{option}") - func_call = glue('{func_name}(...)') - assertthat::assert_that(func_name %in% ls("package:ggsci")) + func_name = glue::glue("pal_{option}") + func_call = glue::glue('{func_name}(...)') return(eval(parse(text=func_call))) } diff --git a/R/sdiv.R b/R/sdiv.R index 8a8cbde..6da30a2 100644 --- a/R/sdiv.R +++ b/R/sdiv.R @@ -8,6 +8,6 @@ sdiv <- function(X, Y, names = dimnames(X)) { sX <- summary(X) sY <- summary(Y) sRes <- merge(sX, sY, by = c("i", "j")) - sparseMatrix(i = sRes[,1], j = sRes[,2], x = sRes[,3] / sRes[,4], - dimnames = names) + result <- Matrix::sparseMatrix(i = sRes[,1], j = sRes[,2], x = sRes[,3] / sRes[,4], dimnames = names) + return() } diff --git a/README.md b/README.md index 5b54b47..9184c20 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.28 +# Current Features v0.2.29 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. diff --git a/docs/404.html b/docs/404.html index 78aa6b2..54fd82b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ sigurd - 0.2.15 + 0.2.29 + + + + + +
+
+
+ +
+

We get the genotyping information for a set of variants. +The function returns a matrix with the values from the specified assay.

+
+ +
+

Usage

+
GetVariantInfo(SE, information = "consensus", variants = NULL, cells = NULL)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
information
+

The assay with the desired information. Default: consensus

+ + +
variants
+

A vector of variants.

+ + +
cells
+

A vector of cell IDs. On default all cells are returned. Default: NULL

+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/HeatmapVoi.html b/docs/reference/HeatmapVoi.html index cf703c0..10f5f3e 100644 --- a/docs/reference/HeatmapVoi.html +++ b/docs/reference/HeatmapVoi.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.29 + + + + + +
+
+
+ +
+

We load a cellwise pileup result from a VCF file.

+
+ +
+

Usage

+
LoadingVCF_typewise(
+  samples_file,
+  samples_path = NULL,
+  barcodes_path = NULL,
+  vcf_path,
+  patient,
+  sample = NULL,
+  type_use = "scRNAseq_Somatic",
+  min_reads = NULL,
+  min_cells = 2
+)
+
+ +
+

Arguments

+
samples_file
+

Path to the csv file with the samples to be loaded.

+ + +
samples_path
+

Path to the input folder. Must include a barcodes file.

+ + +
vcf_path
+

Path to the VCF file with the variants.

+ + +
patient
+

The patient you want to load.

+ + +
type_use
+

The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT.

+ + +
min_reads
+

The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL.

+ + +
min_cells
+

The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2.

+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/LoadingVarTrix_ori.html b/docs/reference/LoadingVarTrix_ori.html index 2f277bb..c1d0d08 100644 --- a/docs/reference/LoadingVarTrix_ori.html +++ b/docs/reference/LoadingVarTrix_ori.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.29 + + + + + +
+
+
+ +
+

We add the genotyping information for a set of variants to a Seurat object. +The function returns a matrix with the values from the specified assay.

+
+ +
+

Usage

+
SetVariantInfo(SE, seurat_object, information = "consensus", variants = NULL)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
seurat_object
+

The Seurat object.

+ + +
information
+

The assay with the desired information. Default: consensus

+ + +
variants
+

A vector of variants.

+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/VariantBurden.html b/docs/reference/VariantBurden.html index a03b092..e7fed03 100644 --- a/docs/reference/VariantBurden.html +++ b/docs/reference/VariantBurden.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.29
+ + + + + +
+
+
+ +
+

A function to convert the heterozyguous/homozyguous information from the VCF to the consensus information from VarTrix. +It is only used in LoadingVCF_typewise.R.

+
+ +
+

Usage

+
char_to_numeric(char_value)
+
+ +
+

Arguments

+
char_value
+
+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/combine_NAMES.html b/docs/reference/combine_NAMES.html index f5b69b6..8717470 100644 --- a/docs/reference/combine_NAMES.html +++ b/docs/reference/combine_NAMES.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.29 + + + + + +
+
+
+ +
+

This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +Source: https://github.com/petervangalen/MAESTER-2021

+
+ +
+

Usage

+
getMutMatrix(SE, cov, letter, ref_allele, chromosome_prefix)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
cov
+

The coverage matrix from MAEGATK/MGATK.

+ + +
letter
+

The base we are interested in.

+ + +
ref_allele
+

Vector of reference alleles.

+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/getReadMatrix.html b/docs/reference/getReadMatrix.html index ad4ca69..413e73f 100644 --- a/docs/reference/getReadMatrix.html +++ b/docs/reference/getReadMatrix.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.29