Patch ups for initial version of Bioconductor.

ccb-hms · Sep 19, 2024 · a7493b8 · a7493b8
1 parent f994c72
commit a7493b8
Show file tree

Hide file tree

Showing 102 changed files with 1,377 additions and 1,376 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,16 +1,16 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(plot,calculateAveragePairwiseCorrelation)
-S3method(plot,calculateCellDistances)
-S3method(plot,calculateCellSimilarityPCA)
-S3method(plot,calculateDiscriminantSpace)
-S3method(plot,calculateNearestNeighborProbabilities)
-S3method(plot,calculateSIRSpace)
-S3method(plot,compareCCA)
-S3method(plot,comparePCA)
-S3method(plot,comparePCASubspace)
-S3method(plot,detectAnomaly)
-S3method(plot,regressPC)
+S3method(plot,calculateAveragePairwiseCorrelationObject)
+S3method(plot,calculateCellDistancesObject)
+S3method(plot,calculateCellSimilarityPCAObject)
+S3method(plot,calculateDiscriminantSpaceObject)
+S3method(plot,calculateNearestNeighborProbabilitiesObject)
+S3method(plot,calculateSIRSpaceObject)
+S3method(plot,compareCCAObject)
+S3method(plot,comparePCAObject)
+S3method(plot,comparePCASubspaceObject)
+S3method(plot,detectAnomalyObject)
+S3method(plot,regressPCObject)
 export(boxplotPCA)
 export(calculateAveragePairwiseCorrelation)
 export(calculateCategorizationEntropy)

diff --git a/R/boxplotPCA.R b/R/boxplotPCA.R
@@ -1,14 +1,14 @@
 #' @title Plot Principal Components for Different Cell Types
 #'
-#' @description This function generates a \code{ggplot2} boxplot visualization of principal components (PCs) for different 
+#' @description This function generates a \code{ggplot2} boxplot visualization of principal components (PCs) for different
 #' cell types across two datasets (query and reference).
 #'
 #' @details
-#' The function \code{boxplotPCA} is designed to provide a visualization of principal component analysis (PCA) results. It projects 
-#' the query dataset onto the principal components obtained from the reference dataset. The results are then visualized 
-#' as boxplots, grouped by cell types and datasets (query and reference). This allows for a comparative analysis of the 
-#' distributions of the principal components across different cell types and datasets. The function internally calls \code{projectPCA} 
-#' to perform the PCA projection. It then reshapes the output data into a long format suitable for ggplot2 plotting. 
+#' The function \code{boxplotPCA} is designed to provide a visualization of principal component analysis (PCA) results. It projects
+#' the query dataset onto the principal components obtained from the reference dataset. The results are then visualized
+#' as boxplots, grouped by cell types and datasets (query and reference). This allows for a comparative analysis of the
+#' distributions of the principal components across different cell types and datasets. The function internally calls \code{projectPCA}
+#' to perform the PCA projection. It then reshapes the output data into a long format suitable for ggplot2 plotting.
 #'
 #' @param query_data A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the query cells.
 #' @param reference_data A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the reference cells.
@@ -21,35 +21,35 @@
 #' @return A ggplot object representing the boxplots of specified principal components for the given cell types and datasets.
 #'
 #' @export
-#' 
+#'
 #' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
 #'
 #' @examples
 #' # Load data
 #' data("reference_data")
 #' data("query_data")
-#' 
+#'
 #' # Plot the PC data
-#' pc_plot <- boxplotPCA(query_data = query_data, 
+#' pc_plot <- boxplotPCA(query_data = query_data,
 #'                       reference_data = reference_data,
 #'                       cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"),
-#'                       query_cell_type_col = "SingleR_annotation", 
-#'                       ref_cell_type_col = "expert_annotation", 
-#'                       pc_subset = 1:3)
+#'                       query_cell_type_col = "SingleR_annotation",
+#'                       ref_cell_type_col = "expert_annotation",
+#'                       pc_subset = 1:6)
 #' pc_plot
-#' 
+#'
 #' @importFrom stats approxfun cancor density setNames
 #' @importFrom utils combn
-#'                          
+#'
 # Function to plot PC for different cell types
-boxplotPCA <- function(query_data, 
-                       reference_data, 
-                       query_cell_type_col, 
-                       ref_cell_type_col, 
+boxplotPCA <- function(query_data,
+                       reference_data,
+                       query_cell_type_col,
+                       ref_cell_type_col,
                        cell_types = NULL,
                        pc_subset = 1:5,
                        assay_name = "logcounts"){
-    
+
     # Check standard input arguments
     argumentCheck(query_data = query_data,
                   reference_data = reference_data,
@@ -58,69 +58,69 @@ boxplotPCA <- function(query_data,
                   cell_types = cell_types,
                   pc_subset_ref = pc_subset,
                   assay_name = assay_name)
-    
+
     # Get common cell types if they are not specified by user
     if(is.null(cell_types)){
         cell_types <- na.omit(unique(c(reference_data[[ref_cell_type_col]],
                                        query_data[[query_cell_type_col]])))
     }
-    
+
     # Get the projected PCA data
-    pca_output <- projectPCA(query_data = query_data, 
-                             reference_data = reference_data, 
-                             query_cell_type_col = query_cell_type_col, 
+    pca_output <- projectPCA(query_data = query_data,
+                             reference_data = reference_data,
+                             query_cell_type_col = query_cell_type_col,
                              ref_cell_type_col = ref_cell_type_col,
                              pc_subset = pc_subset,
                              assay_name = assay_name)
-    
+
     # Create the long format data frame manually
     pca_output <- pca_output[!is.na(pca_output[["cell_type"]]),]
     if(!is.null(cell_types)){
         if(all(cell_types %in% pca_output[["cell_type"]])){
-            pca_output <- pca_output[which(pca_output[["cell_type"]] %in% 
+            pca_output <- pca_output[which(pca_output[["cell_type"]] %in%
                                                cell_types),]
         } else{
             stop("One or more of the specified \'cell_types\' are not available.")
         }
     }
-    pca_long <- data.frame(PC = rep(paste0("pc", pc_subset), 
+    pca_long <- data.frame(PC = rep(paste0("pc", pc_subset),
                                     each = nrow(pca_output)),
                            Value = unlist(c(pca_output[, pc_subset])),
-                           dataset = rep(pca_output[["dataset"]], 
+                           dataset = rep(pca_output[["dataset"]],
                                          length(pc_subset)),
-                           cell_type = rep(pca_output[["cell_type"]], 
+                           cell_type = rep(pca_output[["cell_type"]],
                                            length(pc_subset)))
     pca_long[["PC"]] <- toupper(pca_long[["PC"]])
-    
+
     # Create a new variable representing the combination of cell type and dataset
-    pca_long[["cell_type_dataset"]] <- paste(pca_long[["dataset"]], 
-                                             pca_long[["cell_type"]], 
+    pca_long[["cell_type_dataset"]] <- paste(pca_long[["dataset"]],
+                                             pca_long[["cell_type"]],
                                              sep = " ")
-    
+
     # Define the order of cell type and dataset combinations
-    order_combinations <- paste(rep(c("Reference", "Query"), 
+    order_combinations <- paste(rep(c("Reference", "Query"),
                                     length(unique(pca_long[["cell_type"]]))),
-                                rep(sort(unique(pca_long[["cell_type"]])), 
+                                rep(sort(unique(pca_long[["cell_type"]])),
                                     each = 2))
-    
+
     # Reorder the levels of cell type and dataset factor
-    pca_long[["cell_type_dataset"]] <- factor(pca_long[["cell_type_dataset"]], 
+    pca_long[["cell_type_dataset"]] <- factor(pca_long[["cell_type_dataset"]],
                                               levels = order_combinations)
-    
+
     # Define the colors for cell types
-    cell_type_colors <- generateColors(order_combinations, 
+    cell_type_colors <- generateColors(order_combinations,
                                        paired = TRUE)
-    
+
     # Create the ggplot
     plot <- ggplot2::ggplot(pca_long, ggplot2::aes(
-        x = .data[["cell_type"]], 
-        y = .data[["Value"]], 
+        x = .data[["cell_type"]],
+        y = .data[["Value"]],
         fill = .data[["cell_type_dataset"]])) +
-        ggplot2::geom_boxplot(alpha = 0.7, outlier.shape = NA, width = 0.7) + 
+        ggplot2::geom_boxplot(alpha = 0.7, outlier.shape = NA, width = 0.7) +
         ggplot2::facet_wrap(~ .data[["PC"]], scales = "free") +
-        ggplot2::scale_fill_manual(values = cell_type_colors, 
-                                   name = "Cell Types") + 
-        ggplot2::labs(x = "", y = "Value") +  
+        ggplot2::scale_fill_manual(values = cell_type_colors,
+                                   name = "Cell Types") +
+        ggplot2::labs(x = "", y = "PCA Score") +
         ggplot2::theme_bw() +
         ggplot2::theme(
             panel.grid.minor = ggplot2::element_blank(),

diff --git a/R/calculateAveragePairwiseCorrelation.R b/R/calculateAveragePairwiseCorrelation.R
@@ -1,14 +1,14 @@
 #' @title Compute Average Pairwise Correlation between Cell Types
 #'
 #' @description
-#' Computes the average pairwise correlations between specified cell types 
+#' Computes the average pairwise correlations between specified cell types
 #' in single-cell gene expression data.
-#' 
-#' @details 
-#' This function operates on \code{\linkS4class{SingleCellExperiment}} objects, 
-#' ideal for single-cell analysis workflows. It calculates pairwise correlations between query and 
-#' reference cells using a specified correlation method, then averages these correlations for each 
-#' cell type pair. This function aids in assessing the similarity between cells in reference and query datasets, 
+#'
+#' @details
+#' This function operates on \code{\linkS4class{SingleCellExperiment}} objects,
+#' ideal for single-cell analysis workflows. It calculates pairwise correlations between query and
+#' reference cells using a specified correlation method, then averages these correlations for each
+#' cell type pair. This function aids in assessing the similarity between cells in reference and query datasets,
 #' providing insights into the reliability of cell type annotations in single-cell gene expression data.
 #'
 #' @param query_data A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the query cells.
@@ -21,49 +21,49 @@
 #' @param correlation_method The correlation method to use for calculating pairwise correlations.
 #' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
 #'
-#' @return A matrix containing the average pairwise correlation values. 
-#'         Rows and columns are labeled with the cell types. Each element 
-#'         in the matrix represents the average correlation between a pair 
+#' @return A matrix containing the average pairwise correlation values.
+#'         Rows and columns are labeled with the cell types. Each element
+#'         in the matrix represents the average correlation between a pair
 #'         of cell types.
-#'         
-#' @seealso \code{\link{plot.calculateAveragePairwiseCorrelation}}
-#' 
+#'
+#' @seealso \code{\link{plot.calculateAveragePairwiseCorrelationObject}}
+#'
 #' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
-#' 
+#'
 #' @examples
 #' # Load data
 #' data("reference_data")
 #' data("query_data")
-#' 
+#'
 #' # Compute pairwise correlations
-#' cor_matrix_avg <- calculateAveragePairwiseCorrelation(query_data = query_data, 
-#'                                                       reference_data = reference_data, 
-#'                                                       query_cell_type_col = "SingleR_annotation", 
-#'                                                       ref_cell_type_col = "expert_annotation", 
-#'                                                       cell_types = c("CD4", "CD8", "B_and_plasma"), 
+#' cor_matrix_avg <- calculateAveragePairwiseCorrelation(query_data = query_data,
+#'                                                       reference_data = reference_data,
+#'                                                       query_cell_type_col = "SingleR_annotation",
+#'                                                       ref_cell_type_col = "expert_annotation",
+#'                                                       cell_types = c("CD4", "CD8", "B_and_plasma"),
 #'                                                       pc_subset = 1:10,
 #'                                                       correlation_method = "spearman")
-#' 
+#'
 #' # Visualize correlation output
 #' plot(cor_matrix_avg)
 #'
 #' @import SingleCellExperiment
 #' @importFrom stats cor
-#' 
+#'
 #' @export
 calculateAveragePairwiseCorrelation <- function(
-        query_data, 
-        reference_data, 
-        query_cell_type_col, 
-        ref_cell_type_col, 
-        cell_types = NULL, 
+        query_data,
+        reference_data,
+        query_cell_type_col,
+        ref_cell_type_col,
+        cell_types = NULL,
         pc_subset = 1:10,
         correlation_method = c("spearman", "pearson"),
         assay_name = "logcounts") {
-    
+
     # Match correlation method argument
     correlation_method <- match.arg(correlation_method)
-    
+
     # Check standard input arguments
     argumentCheck(query_data = query_data,
                   reference_data = reference_data,
@@ -72,61 +72,61 @@ calculateAveragePairwiseCorrelation <- function(
                   cell_types = cell_types,
                   pc_subset_ref = pc_subset,
                   assay_name = assay_name)
-    
+
     # Get common cell types if they are not specified by user
     if(is.null(cell_types)){
         cell_types <- na.omit(unique(c(reference_data[[ref_cell_type_col]],
                                        query_data[[query_cell_type_col]])))
     }
-    
+
     # Function to compute correlation between two cell types
     .computeCorrelation <- function(type1, type2) {
-        
+
         if(!is.null(pc_subset)){
             # Project query data onto PCA space of reference data
             pca_output <- projectPCA(
-                query_data = query_data, 
-                reference_data = reference_data, 
+                query_data = query_data,
+                reference_data = reference_data,
                 query_cell_type_col = query_cell_type_col,
                 ref_cell_type_col = ref_cell_type_col,
                 pc_subset = pc_subset,
                 assay_name = assay_name)
             ref_mat <- pca_output[which(
                 pca_output[["dataset"]] == "Reference" &
-                    pca_output[["cell_type"]] == type2), 
+                    pca_output[["cell_type"]] == type2),
                 paste0("PC", pc_subset)]
             query_mat <- pca_output[which(
                 pca_output[["dataset"]] == "Query" &
-                    pca_output[["cell_type"]] == type1), 
+                    pca_output[["cell_type"]] == type1),
                 paste0("PC", pc_subset)]
         } else{
-            
+
             # Subset query data to the specified cell type
             query_subset <- query_data[, which(
                 query_data[[query_cell_type_col]] == type1), drop = FALSE]
             ref_subset <- reference_data[, which(
                 reference_data[[ref_cell_type_col]] == type2), drop = FALSE]
-            
+
             query_mat <- t(as.matrix(assay(query_subset, assay_name)))
             ref_mat <- t(as.matrix(assay(ref_subset, assay_name)))
         }
-        
-        cor_matrix <- cor(t(query_mat), t(ref_mat), 
+
+        cor_matrix <- cor(t(query_mat), t(ref_mat),
                           method = correlation_method)
         return(mean(cor_matrix))
     }
-    
+
     # Use outer to compute pairwise correlations
-    cor_matrix_avg <- outer(cell_types, cell_types, 
+    cor_matrix_avg <- outer(cell_types, cell_types,
                             Vectorize(.computeCorrelation))
-    
+
     # Assign cell type names to rows and columns
     rownames(cor_matrix_avg) <- paste0("Query-", cell_types)
     colnames(cor_matrix_avg) <- paste0("Ref-", cell_types)
-    
+
     # Update class of output
-    class(cor_matrix_avg) <- c(class(cor_matrix_avg), 
-                               "calculateAveragePairwiseCorrelation")
-    
+    class(cor_matrix_avg) <- c(class(cor_matrix_avg),
+                               "calculateAveragePairwiseCorrelationObject")
+
     return(cor_matrix_avg)
 }