Merge pull request #40 from AnthonyChristidis/main

Fixing some minor issues in documentation of function.
ccb-hms · Jun 4, 2024 · 5078809 · 5078809
2 parents e877a0c + 75215aa
commit 5078809
Show file tree

Hide file tree

Showing 41 changed files with 495 additions and 1,462 deletions.
diff --git a/R/boxplotPCA.R b/R/boxplotPCA.R
@@ -72,7 +72,7 @@
 #'                       cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"),
 #'                       query_cell_type_col = "labels", 
 #'                       ref_cell_type_col = "reclustered.broad", 
-#'                       pc_subset = c(1:5))
+#'                       pc_subset = c(1:6))
 #' pc_plot
 #' 
 #' 

diff --git a/R/calculateHVGOverlap.R b/R/calculateHVGOverlap.R
@@ -49,6 +49,7 @@
 #' 
 #' overlap_coefficient <- calculateHVGOverlap(reference_genes = ref_var, 
 #'                                           query_genes = query_var)
+#' overlap_coefficient
 #' 
 #' @export                                       
 calculateHVGOverlap <- function(reference_genes, query_genes) {

diff --git a/R/calculateHotellingPValue.R b/R/calculateHotellingPValue.R
@@ -66,7 +66,7 @@
 #' # Get the p-values from the test
 #' p_values <- calculateHotellingPValue(query_data_subset, ref_data_subset, 
 #'                                      n_components = 10, 
-#'                                      query_cell_type_col = "reclustered.broad", 
+#'                                      query_cell_type_col = "labels", 
 #'                                      ref_cell_type_col = "reclustered.broad",
 #'                                      pc_subset = c(1:10)) 
 #' round(p_values, 5)

diff --git a/R/calculateSampleDistancesSimilarity.R b/R/calculateSampleDistancesSimilarity.R
@@ -3,7 +3,6 @@
 #' @description 
 #' This function computes Bhattacharyya coefficients and Hellinger distances to quantify the similarity of density 
 #' distributions between query samples and reference data for each cell type.
-
 #'
 #' @details 
 #' This function first computes distance data using the \code{calculateSampleDistances} function, which calculates 
@@ -100,6 +99,7 @@
 #'                                                        query_cell_type_col = "labels", 
 #'                                                        ref_cell_type_col = "reclustered.broad",
 #'                                                        pc_subset = c(1:10))
+#' overlap_measures
 #' 
 #' 
 # Function to compute Bhattacharyya coefficients and Hellinger distances

diff --git a/R/calculateVarImpOverlap.R b/R/calculateVarImpOverlap.R
@@ -67,6 +67,7 @@
 #'                                     ref_cell_type_col = "reclustered.broad", 
 #'                                     n_tree = 500,
 #'                                     n_top = 20)
+#' rf_output
 #' 
 #' 
 # RF function to compare (between datasets) which genes are best at differentiating cell types from each 

diff --git a/R/histQCvsAnnotation.R b/R/histQCvsAnnotation.R
@@ -113,17 +113,19 @@ histQCvsAnnotation <- function(query_data,
 
   # Create histogram for QC stats
   qc_histogram <- ggplot2::ggplot(data, aes(x = QCStats)) +
-      ggplot2::geom_histogram(color = "black", fill = "white") +
+      ggplot2::geom_histogram(color = "black", fill = "#2E8B57", bins = 30) +
       ggplot2::xlab(paste(qc_col)) +
       ggplot2::ylab("Frequency") +
-      ggplot2::theme_bw()
+      ggplot2::theme_bw() +
+      ggplot2::theme(panel.grid.minor = ggplot2::element_blank())
 
   # Create histogram for scores
   scores_histogram <- ggplot2::ggplot(data, aes(x = Scores)) +
-      ggplot2::geom_histogram(color = "black", fill = "white") +
+      ggplot2::geom_histogram(color = "black", fill = "#4169E1", bins = 30) +
       ggplot2::xlab("Annotation Scores") +
       ggplot2::ylab("Frequency") +
-      ggplot2::theme_bw()
+      ggplot2::theme_bw() +
+      ggplot2::theme(panel.grid.minor = ggplot2::element_blank())
 
   # Return the list of plots
   return(gridExtra::grid.arrange(qc_histogram, scores_histogram, ncol = 2))

diff --git a/R/plot.calculateAveragePairwiseCorrelation.R b/R/plot.calculateAveragePairwiseCorrelation.R
@@ -18,73 +18,15 @@
 #'         
 #' @seealso \code{\link{calculateAveragePairwiseCorrelation}}
 #' 
-#' @examples
-#' library(scater)
-#' library(scran)
-#' library(scRNAseq)
-#' library(SingleR)
-#'
-#' # Load data
-#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
-#'
-#' # Divide the data into reference and query datasets
-#' set.seed(100)
-#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
-#' ref_data <- sce[, indices]
-#' query_data <- sce[, -indices]
-#'
-#' # log transform datasets
-#' ref_data <- logNormCounts(ref_data)
-#' query_data <- logNormCounts(query_data)
-#'
-#' # Get cell type scores using SingleR
-#' scores <- SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
-#'
-#' # Add labels to query object
-#' colData(query_data)$labels <- scores$labels
-#'
-#' # Compute Pairwise Correlations
-#' # Note: The selection of highly variable genes and desired cell types may vary 
-#' # based on user preference. 
-#' # The cell type annotation method used in this example is SingleR. 
-#' # User can use any other method for cell type annotation and provide 
-#' # the corresponding labels in the metadata.
-#'
-#' # Selecting highly variable genes
-#' ref_var <- getTopHVGs(ref_data, n = 2000)
-#' query_var <- getTopHVGs(query_data, n = 2000)
-#'
-#' # Intersect the gene symbols to obtain common genes
-#' common_genes <- intersect(ref_var, query_var)
-#'
-#' # Select desired cell types
-#' selected_cell_types <- c("CD4", "CD8", "B_and_plasma")
-#' ref_data_subset <- ref_data[common_genes, ref_data$reclustered.broad %in% selected_cell_types]
-#' query_data_subset <- query_data[common_genes, query_data$reclustered.broad %in% selected_cell_types]
-#' 
-#' # Run PCA on the reference data
-#' ref_data_subset <- runPCA(ref_data_subset)
-#'
-#' # Compute pairwise correlations
-#' cor_matrix_avg <- calculateAveragePairwiseCorrelation(query_data = query_data_subset, 
-#'                                                       reference_data = ref_data_subset, 
-#'                                                       n_components = 10,
-#'                                                       query_cell_type_col = "labels", 
-#'                                                       ref_cell_type_col = "reclustered.broad", 
-#'                                                       cell_types = selected_cell_types, 
-#'                                                       correlation_method = "spearman")
-#'
-#' # Visualize the results
-#' plot(cor_matrix_avg)
+#' @rdname calculateAveragePairwiseCorrelation
 #' 
-#'
 # Function to plot the output of the calculateAveragePairwiseCorrelation function
 plot.calculateAveragePairwiseCorrelation <- function(x, ...){
 
     # Convert matrix to dataframe
-    cor_df <- as.data.frame(as.table(cor_matrix_avg))
-    cor_df$Var1 <- factor(cor_df$Var1, levels = rownames(cor_matrix_avg))
-    cor_df$Var2 <- factor(cor_df$Var2, levels = rev(colnames(cor_matrix_avg)))
+    cor_df <- as.data.frame(as.table(x))
+    cor_df$Var1 <- factor(cor_df$Var1, levels = rownames(x))
+    cor_df$Var2 <- factor(cor_df$Var2, levels = rev(colnames(x)))
 
     # Create the heatmap with updated colors and improved aesthetics
     heatmap_plot <- ggplot2::ggplot(cor_df, ggplot2::aes(x = Var2, y = Var1)) +

diff --git a/R/plot.calculateSampleDistances.R b/R/plot.calculateSampleDistances.R
@@ -22,67 +22,9 @@
 #' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
 #' 
 #' @seealso \code{\link{calculateSampleDistances}}
-#'
-#' @examples
-#' # Load required libraries
-#' library(scRNAseq)
-#' library(scuttle)
-#' library(SingleR)
-#' library(scran)
-#' library(scater)
-#'
-#' # Load data (replace with your data loading)
-#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
-#' 
-#' # Divide the data into reference and query datasets
-#' set.seed(100)
-#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
-#' ref_data <- sce[, indices]
-#' query_data <- sce[, -indices]
-#' 
-#' # log transform datasets
-#' ref_data <- scuttle::logNormCounts(ref_data)
-#' query_data <- scuttle::logNormCounts(query_data)
-#' 
-#' # Get cell type scores using SingleR (or any other cell type annotation method)
-#' scores <- SingleR::SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
-#' 
-#' # Add labels to query object
-#' colData(query_data)$labels <- scores$labels
 #' 
-#' # Selecting highly variable genes (can be customized by the user)
-#' ref_var <- scran::getTopHVGs(ref_data, n = 2000)
-#' query_var <- scran::getTopHVGs(query_data, n = 2000)
-#' 
-#' # Intersect the gene symbols to obtain common genes
-#' common_genes <- intersect(ref_var, query_var)
-#' ref_data_subset <- ref_data[common_genes, ]
-#' query_data_subset <- query_data[common_genes, ]
-#' 
-#' # Run PCA on the reference data
-#' ref_data_subset <- runPCA(ref_data_subset)
+#' @rdname calculateSampleDistances
 #'
-#' # Plot the PC data
-#' distance_data <- calculateSampleDistances(query_data_subset, ref_data_subset, 
-#'                                           n_components = 10, 
-#'                                           query_cell_type_col = "labels", 
-#'                                           ref_cell_type_col = "reclustered.broad",
-#'                                           pc_subset = c(1:10)) 
-#' 
-#' # Identify outliers for CD4
-#' cd4_anomalies <- detectAnomaly(ref_data_subset, query_data_subset, 
-#'                                query_cell_type_col = "labels", 
-#'                                ref_cell_type_col = "reclustered.broad",
-#'                                n_components = 10,
-#'                                n_tree = 500,
-#'                                anomaly_treshold = 0.5)$CD4
-#' cd4_top5_anomalies <- names(sort(cd4_anomalies$query_anomaly_scores, decreasing = TRUE)[1:6])
-#' 
-#' # Plot the densities of the distances
-#' plot(distance_data, ref_cell_type = "CD4", sample_names = cd4_top5_anomalies)
-#' plot(distance_data, ref_cell_type = "CD8", sample_names = cd4_top5_anomalies)
-#' 
-#'  
 # Function to plot density functions for the reference data and the specified sample
 plot.calculateSampleDistances <- function(x, ref_cell_type, sample_names, ...) {
 

diff --git a/R/plot.calculateSampleSimilarityPCA.R b/R/plot.calculateSampleSimilarityPCA.R
@@ -20,63 +20,9 @@
 #' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
 #' 
 #' @seealso \code{\link{calculateSampleSimilarityPCA}}
-#'
-#' @examples
-#' # Load required libraries
-#' library(scRNAseq)
-#' library(scuttle)
-#' library(SingleR)
-#' library(scran)
-#' library(scater)
-#'
-#' # Load data (replace with your data loading)
-#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
-#' 
-#' # Divide the data into reference and query datasets
-#' set.seed(100)
-#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
-#' ref_data <- sce[, indices]
-#' query_data <- sce[, -indices]
-#' 
-#' # log transform datasets
-#' ref_data <- scuttle::logNormCounts(ref_data)
-#' query_data <- scuttle::logNormCounts(query_data)
-#' 
-#' # Get cell type scores using SingleR (or any other cell type annotation method)
-#' scores <- SingleR::SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
-#' 
-#' # Add labels to query object
-#' colData(query_data)$labels <- scores$labels
 #' 
-#' # Selecting highly variable genes (can be customized by the user)
-#' ref_var <- scran::getTopHVGs(ref_data, n = 2000)
-#' query_var <- scran::getTopHVGs(query_data, n = 2000)
-#' 
-#' # Intersect the gene symbols to obtain common genes
-#' common_genes <- intersect(ref_var, query_var)
-#' ref_data_subset <- ref_data[common_genes, ]
-#' query_data_subset <- query_data[common_genes, ]
-#'
-#' # Run PCA on the reference data (assumed to be prepared)
-#' ref_data_subset <- runPCA(ref_data_subset)
+#' @rdname calculateSampleSimilarityPCA
 #'
-#' # Store PCA anomaly data and plots
-#' anomaly_output <- detectAnomaly(reference_data = ref_data_subset, 
-#'                                 ref_cell_type_col = "reclustered.broad", 
-#'                                 n_components = 10,
-#'                                 n_tree = 500,
-#'                                 anomaly_treshold = 0.5) 
-#' top6_anomalies <- names(sort(anomaly_output$Combined$reference_anomaly_scores, 
-#'                              decreasing = TRUE)[1:6])
-#' 
-#' # Compute cosine similarity between anomalies and top PCs
-#' cosine_similarities <- calculateSampleSimilarityPCA(ref_data_subset, samples = top6_anomalies, 
-#'                                                     pc_subset = c(1:10), n_top_vars = 50)
-#' cosine_similarities
-#' 
-#' # Plot similarities
-#' plot(cosine_similarities, pc_subset = c(1:5))
-#' 
 # Function to plot cosine similarities between samples and PCs
 plot.calculateSampleSimilarityPCA <- function(x, pc_subset = c(1:5), ...){
 

diff --git a/R/plot.compareCCA.R b/R/plot.compareCCA.R
@@ -20,58 +20,7 @@
 #' 
 #' @seealso \code{\link{compareCCA}}
 #' 
-#' @examples
-#' # Load necessary library
-#' library(scRNAseq)
-#' library(scuttle)
-#' library(scran)
-#' library(SingleR)
-#' library(ggplot2)
-#' library(scater)
-#'
-#' # Load data
-#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
-#' 
-#' # Divide the data into reference and query datasets
-#' set.seed(100)
-#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
-#' ref_data <- sce[, indices]
-#' query_data <- sce[, -indices]
-#'
-#' # Log transform datasets
-#' ref_data <- logNormCounts(ref_data)
-#' query_data <- logNormCounts(query_data)
-#'
-#' # Get cell type scores using SingleR (or any other cell type annotation method)
-#' scores <- SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
-#'
-#' # Add labels to query object
-#' colData(query_data)$labels <- scores$labels
-#'
-#' # Selecting highly variable genes (can be customized by the user)
-#' ref_var <- getTopHVGs(ref_data, n = 500)
-#' query_var <- getTopHVGs(query_data, n = 500)
-#'
-#' # Intersect the gene symbols to obtain common genes
-#' common_genes <- intersect(ref_var, query_var)
-#' ref_data_subset <- ref_data[common_genes, ]
-#' query_data_subset <- query_data[common_genes, ]
-#'
-#' # Subset reference and query data for a specific cell type
-#' ref_data_subset <- ref_data_subset[, which(ref_data_subset$reclustered.broad == "CD8")]
-#' query_data_subset <- query_data_subset[, which(colData(query_data_subset)$labels == "CD8")]
-#'
-#' # Run PCA on the reference and query datasets
-#' ref_data_subset <- runPCA(ref_data_subset, ncomponents = 50)
-#' query_data_subset <- runPCA(query_data_subset, ncomponents = 50)
-#' 
-#' # Compare CCA
-#' cca_comparison <- compareCCA(query_data_subset, ref_data_subset, 
-#'                              pc_subset = c(1:5))
-#' 
-#' # Visualize output of CCA comparison
-#' plot(cca_comparison)
-#' 
+#' @rdname compareCCA
 #' 
 # Plot visualization of output from compareCCA function
 plot.compareCCA <- function(x, ...){