Skip to content

Commit

Permalink
Patch ups for initial version of Bioconductor.
Browse files Browse the repository at this point in the history
  • Loading branch information
AnthonyChristidis committed Sep 19, 2024
1 parent f994c72 commit a7493b8
Show file tree
Hide file tree
Showing 102 changed files with 1,377 additions and 1,376 deletions.
22 changes: 11 additions & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# Generated by roxygen2: do not edit by hand

S3method(plot,calculateAveragePairwiseCorrelation)
S3method(plot,calculateCellDistances)
S3method(plot,calculateCellSimilarityPCA)
S3method(plot,calculateDiscriminantSpace)
S3method(plot,calculateNearestNeighborProbabilities)
S3method(plot,calculateSIRSpace)
S3method(plot,compareCCA)
S3method(plot,comparePCA)
S3method(plot,comparePCASubspace)
S3method(plot,detectAnomaly)
S3method(plot,regressPC)
S3method(plot,calculateAveragePairwiseCorrelationObject)
S3method(plot,calculateCellDistancesObject)
S3method(plot,calculateCellSimilarityPCAObject)
S3method(plot,calculateDiscriminantSpaceObject)
S3method(plot,calculateNearestNeighborProbabilitiesObject)
S3method(plot,calculateSIRSpaceObject)
S3method(plot,compareCCAObject)
S3method(plot,comparePCAObject)
S3method(plot,comparePCASubspaceObject)
S3method(plot,detectAnomalyObject)
S3method(plot,regressPCObject)
export(boxplotPCA)
export(calculateAveragePairwiseCorrelation)
export(calculateCategorizationEntropy)
Expand Down
92 changes: 46 additions & 46 deletions R/boxplotPCA.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' @title Plot Principal Components for Different Cell Types
#'
#' @description This function generates a \code{ggplot2} boxplot visualization of principal components (PCs) for different
#' @description This function generates a \code{ggplot2} boxplot visualization of principal components (PCs) for different
#' cell types across two datasets (query and reference).
#'
#' @details
#' The function \code{boxplotPCA} is designed to provide a visualization of principal component analysis (PCA) results. It projects
#' the query dataset onto the principal components obtained from the reference dataset. The results are then visualized
#' as boxplots, grouped by cell types and datasets (query and reference). This allows for a comparative analysis of the
#' distributions of the principal components across different cell types and datasets. The function internally calls \code{projectPCA}
#' to perform the PCA projection. It then reshapes the output data into a long format suitable for ggplot2 plotting.
#' The function \code{boxplotPCA} is designed to provide a visualization of principal component analysis (PCA) results. It projects
#' the query dataset onto the principal components obtained from the reference dataset. The results are then visualized
#' as boxplots, grouped by cell types and datasets (query and reference). This allows for a comparative analysis of the
#' distributions of the principal components across different cell types and datasets. The function internally calls \code{projectPCA}
#' to perform the PCA projection. It then reshapes the output data into a long format suitable for ggplot2 plotting.
#'
#' @param query_data A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the query cells.
#' @param reference_data A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the reference cells.
Expand All @@ -21,35 +21,35 @@
#' @return A ggplot object representing the boxplots of specified principal components for the given cell types and datasets.
#'
#' @export
#'
#'
#' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
#'
#' @examples
#' # Load data
#' data("reference_data")
#' data("query_data")
#'
#'
#' # Plot the PC data
#' pc_plot <- boxplotPCA(query_data = query_data,
#' pc_plot <- boxplotPCA(query_data = query_data,
#' reference_data = reference_data,
#' cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"),
#' query_cell_type_col = "SingleR_annotation",
#' ref_cell_type_col = "expert_annotation",
#' pc_subset = 1:3)
#' query_cell_type_col = "SingleR_annotation",
#' ref_cell_type_col = "expert_annotation",
#' pc_subset = 1:6)
#' pc_plot
#'
#'
#' @importFrom stats approxfun cancor density setNames
#' @importFrom utils combn
#'
#'
# Function to plot PC for different cell types
boxplotPCA <- function(query_data,
reference_data,
query_cell_type_col,
ref_cell_type_col,
boxplotPCA <- function(query_data,
reference_data,
query_cell_type_col,
ref_cell_type_col,
cell_types = NULL,
pc_subset = 1:5,
assay_name = "logcounts"){

# Check standard input arguments
argumentCheck(query_data = query_data,
reference_data = reference_data,
Expand All @@ -58,69 +58,69 @@ boxplotPCA <- function(query_data,
cell_types = cell_types,
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Get common cell types if they are not specified by user
if(is.null(cell_types)){
cell_types <- na.omit(unique(c(reference_data[[ref_cell_type_col]],
query_data[[query_cell_type_col]])))
}

# Get the projected PCA data
pca_output <- projectPCA(query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
pca_output <- projectPCA(query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset,
assay_name = assay_name)

# Create the long format data frame manually
pca_output <- pca_output[!is.na(pca_output[["cell_type"]]),]
if(!is.null(cell_types)){
if(all(cell_types %in% pca_output[["cell_type"]])){
pca_output <- pca_output[which(pca_output[["cell_type"]] %in%
pca_output <- pca_output[which(pca_output[["cell_type"]] %in%
cell_types),]
} else{
stop("One or more of the specified \'cell_types\' are not available.")
}
}
pca_long <- data.frame(PC = rep(paste0("pc", pc_subset),
pca_long <- data.frame(PC = rep(paste0("pc", pc_subset),
each = nrow(pca_output)),
Value = unlist(c(pca_output[, pc_subset])),
dataset = rep(pca_output[["dataset"]],
dataset = rep(pca_output[["dataset"]],
length(pc_subset)),
cell_type = rep(pca_output[["cell_type"]],
cell_type = rep(pca_output[["cell_type"]],
length(pc_subset)))
pca_long[["PC"]] <- toupper(pca_long[["PC"]])

# Create a new variable representing the combination of cell type and dataset
pca_long[["cell_type_dataset"]] <- paste(pca_long[["dataset"]],
pca_long[["cell_type"]],
pca_long[["cell_type_dataset"]] <- paste(pca_long[["dataset"]],
pca_long[["cell_type"]],
sep = " ")

# Define the order of cell type and dataset combinations
order_combinations <- paste(rep(c("Reference", "Query"),
order_combinations <- paste(rep(c("Reference", "Query"),
length(unique(pca_long[["cell_type"]]))),
rep(sort(unique(pca_long[["cell_type"]])),
rep(sort(unique(pca_long[["cell_type"]])),
each = 2))

# Reorder the levels of cell type and dataset factor
pca_long[["cell_type_dataset"]] <- factor(pca_long[["cell_type_dataset"]],
pca_long[["cell_type_dataset"]] <- factor(pca_long[["cell_type_dataset"]],
levels = order_combinations)

# Define the colors for cell types
cell_type_colors <- generateColors(order_combinations,
cell_type_colors <- generateColors(order_combinations,
paired = TRUE)

# Create the ggplot
plot <- ggplot2::ggplot(pca_long, ggplot2::aes(
x = .data[["cell_type"]],
y = .data[["Value"]],
x = .data[["cell_type"]],
y = .data[["Value"]],
fill = .data[["cell_type_dataset"]])) +
ggplot2::geom_boxplot(alpha = 0.7, outlier.shape = NA, width = 0.7) +
ggplot2::geom_boxplot(alpha = 0.7, outlier.shape = NA, width = 0.7) +
ggplot2::facet_wrap(~ .data[["PC"]], scales = "free") +
ggplot2::scale_fill_manual(values = cell_type_colors,
name = "Cell Types") +
ggplot2::labs(x = "", y = "Value") +
ggplot2::scale_fill_manual(values = cell_type_colors,
name = "Cell Types") +
ggplot2::labs(x = "", y = "PCA Score") +
ggplot2::theme_bw() +
ggplot2::theme(
panel.grid.minor = ggplot2::element_blank(),
Expand Down
94 changes: 47 additions & 47 deletions R/calculateAveragePairwiseCorrelation.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' @title Compute Average Pairwise Correlation between Cell Types
#'
#' @description
#' Computes the average pairwise correlations between specified cell types
#' Computes the average pairwise correlations between specified cell types
#' in single-cell gene expression data.
#'
#' @details
#' This function operates on \code{\linkS4class{SingleCellExperiment}} objects,
#' ideal for single-cell analysis workflows. It calculates pairwise correlations between query and
#' reference cells using a specified correlation method, then averages these correlations for each
#' cell type pair. This function aids in assessing the similarity between cells in reference and query datasets,
#'
#' @details
#' This function operates on \code{\linkS4class{SingleCellExperiment}} objects,
#' ideal for single-cell analysis workflows. It calculates pairwise correlations between query and
#' reference cells using a specified correlation method, then averages these correlations for each
#' cell type pair. This function aids in assessing the similarity between cells in reference and query datasets,
#' providing insights into the reliability of cell type annotations in single-cell gene expression data.
#'
#' @param query_data A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the query cells.
Expand All @@ -21,49 +21,49 @@
#' @param correlation_method The correlation method to use for calculating pairwise correlations.
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A matrix containing the average pairwise correlation values.
#' Rows and columns are labeled with the cell types. Each element
#' in the matrix represents the average correlation between a pair
#' @return A matrix containing the average pairwise correlation values.
#' Rows and columns are labeled with the cell types. Each element
#' in the matrix represents the average correlation between a pair
#' of cell types.
#'
#' @seealso \code{\link{plot.calculateAveragePairwiseCorrelation}}
#'
#'
#' @seealso \code{\link{plot.calculateAveragePairwiseCorrelationObject}}
#'
#' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
#'
#'
#' @examples
#' # Load data
#' data("reference_data")
#' data("query_data")
#'
#'
#' # Compute pairwise correlations
#' cor_matrix_avg <- calculateAveragePairwiseCorrelation(query_data = query_data,
#' reference_data = reference_data,
#' query_cell_type_col = "SingleR_annotation",
#' ref_cell_type_col = "expert_annotation",
#' cell_types = c("CD4", "CD8", "B_and_plasma"),
#' cor_matrix_avg <- calculateAveragePairwiseCorrelation(query_data = query_data,
#' reference_data = reference_data,
#' query_cell_type_col = "SingleR_annotation",
#' ref_cell_type_col = "expert_annotation",
#' cell_types = c("CD4", "CD8", "B_and_plasma"),
#' pc_subset = 1:10,
#' correlation_method = "spearman")
#'
#'
#' # Visualize correlation output
#' plot(cor_matrix_avg)
#'
#' @import SingleCellExperiment
#' @importFrom stats cor
#'
#'
#' @export
calculateAveragePairwiseCorrelation <- function(
query_data,
reference_data,
query_cell_type_col,
ref_cell_type_col,
cell_types = NULL,
query_data,
reference_data,
query_cell_type_col,
ref_cell_type_col,
cell_types = NULL,
pc_subset = 1:10,
correlation_method = c("spearman", "pearson"),
assay_name = "logcounts") {

# Match correlation method argument
correlation_method <- match.arg(correlation_method)

# Check standard input arguments
argumentCheck(query_data = query_data,
reference_data = reference_data,
Expand All @@ -72,61 +72,61 @@ calculateAveragePairwiseCorrelation <- function(
cell_types = cell_types,
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Get common cell types if they are not specified by user
if(is.null(cell_types)){
cell_types <- na.omit(unique(c(reference_data[[ref_cell_type_col]],
query_data[[query_cell_type_col]])))
}

# Function to compute correlation between two cell types
.computeCorrelation <- function(type1, type2) {

if(!is.null(pc_subset)){
# Project query data onto PCA space of reference data
pca_output <- projectPCA(
query_data = query_data,
reference_data = reference_data,
query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset,
assay_name = assay_name)
ref_mat <- pca_output[which(
pca_output[["dataset"]] == "Reference" &
pca_output[["cell_type"]] == type2),
pca_output[["cell_type"]] == type2),
paste0("PC", pc_subset)]
query_mat <- pca_output[which(
pca_output[["dataset"]] == "Query" &
pca_output[["cell_type"]] == type1),
pca_output[["cell_type"]] == type1),
paste0("PC", pc_subset)]
} else{

# Subset query data to the specified cell type
query_subset <- query_data[, which(
query_data[[query_cell_type_col]] == type1), drop = FALSE]
ref_subset <- reference_data[, which(
reference_data[[ref_cell_type_col]] == type2), drop = FALSE]

query_mat <- t(as.matrix(assay(query_subset, assay_name)))
ref_mat <- t(as.matrix(assay(ref_subset, assay_name)))
}
cor_matrix <- cor(t(query_mat), t(ref_mat),

cor_matrix <- cor(t(query_mat), t(ref_mat),
method = correlation_method)
return(mean(cor_matrix))
}

# Use outer to compute pairwise correlations
cor_matrix_avg <- outer(cell_types, cell_types,
cor_matrix_avg <- outer(cell_types, cell_types,
Vectorize(.computeCorrelation))

# Assign cell type names to rows and columns
rownames(cor_matrix_avg) <- paste0("Query-", cell_types)
colnames(cor_matrix_avg) <- paste0("Ref-", cell_types)

# Update class of output
class(cor_matrix_avg) <- c(class(cor_matrix_avg),
"calculateAveragePairwiseCorrelation")
class(cor_matrix_avg) <- c(class(cor_matrix_avg),
"calculateAveragePairwiseCorrelationObject")

return(cor_matrix_avg)
}
Loading

0 comments on commit a7493b8

Please sign in to comment.