Skip to content

Commit

Permalink
Merge pull request #40 from AnthonyChristidis/main
Browse files Browse the repository at this point in the history
Fixing some minor issues in documentation of function.
  • Loading branch information
AnthonyChristidis authored Jun 4, 2024
2 parents e877a0c + 75215aa commit 5078809
Show file tree
Hide file tree
Showing 41 changed files with 495 additions and 1,462 deletions.
2 changes: 1 addition & 1 deletion R/boxplotPCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
#' cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"),
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' pc_subset = c(1:5))
#' pc_subset = c(1:6))
#' pc_plot
#'
#'
Expand Down
1 change: 1 addition & 0 deletions R/calculateHVGOverlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#'
#' overlap_coefficient <- calculateHVGOverlap(reference_genes = ref_var,
#' query_genes = query_var)
#' overlap_coefficient
#'
#' @export
calculateHVGOverlap <- function(reference_genes, query_genes) {
Expand Down
2 changes: 1 addition & 1 deletion R/calculateHotellingPValue.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
#' # Get the p-values from the test
#' p_values <- calculateHotellingPValue(query_data_subset, ref_data_subset,
#' n_components = 10,
#' query_cell_type_col = "reclustered.broad",
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' pc_subset = c(1:10))
#' round(p_values, 5)
Expand Down
2 changes: 1 addition & 1 deletion R/calculateSampleDistancesSimilarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#' @description
#' This function computes Bhattacharyya coefficients and Hellinger distances to quantify the similarity of density
#' distributions between query samples and reference data for each cell type.

#'
#' @details
#' This function first computes distance data using the \code{calculateSampleDistances} function, which calculates
Expand Down Expand Up @@ -100,6 +99,7 @@
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' pc_subset = c(1:10))
#' overlap_measures
#'
#'
# Function to compute Bhattacharyya coefficients and Hellinger distances
Expand Down
1 change: 1 addition & 0 deletions R/calculateVarImpOverlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#' ref_cell_type_col = "reclustered.broad",
#' n_tree = 500,
#' n_top = 20)
#' rf_output
#'
#'
# RF function to compare (between datasets) which genes are best at differentiating cell types from each
Expand Down
10 changes: 6 additions & 4 deletions R/histQCvsAnnotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,19 @@ histQCvsAnnotation <- function(query_data,

# Create histogram for QC stats
qc_histogram <- ggplot2::ggplot(data, aes(x = QCStats)) +
ggplot2::geom_histogram(color = "black", fill = "white") +
ggplot2::geom_histogram(color = "black", fill = "#2E8B57", bins = 30) +
ggplot2::xlab(paste(qc_col)) +
ggplot2::ylab("Frequency") +
ggplot2::theme_bw()
ggplot2::theme_bw() +
ggplot2::theme(panel.grid.minor = ggplot2::element_blank())

# Create histogram for scores
scores_histogram <- ggplot2::ggplot(data, aes(x = Scores)) +
ggplot2::geom_histogram(color = "black", fill = "white") +
ggplot2::geom_histogram(color = "black", fill = "#4169E1", bins = 30) +
ggplot2::xlab("Annotation Scores") +
ggplot2::ylab("Frequency") +
ggplot2::theme_bw()
ggplot2::theme_bw() +
ggplot2::theme(panel.grid.minor = ggplot2::element_blank())

# Return the list of plots
return(gridExtra::grid.arrange(qc_histogram, scores_histogram, ncol = 2))
Expand Down
66 changes: 4 additions & 62 deletions R/plot.calculateAveragePairwiseCorrelation.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,73 +18,15 @@
#'
#' @seealso \code{\link{calculateAveragePairwiseCorrelation}}
#'
#' @examples
#' library(scater)
#' library(scran)
#' library(scRNAseq)
#' library(SingleR)
#'
#' # Load data
#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
#'
#' # Divide the data into reference and query datasets
#' set.seed(100)
#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
#' ref_data <- sce[, indices]
#' query_data <- sce[, -indices]
#'
#' # log transform datasets
#' ref_data <- logNormCounts(ref_data)
#' query_data <- logNormCounts(query_data)
#'
#' # Get cell type scores using SingleR
#' scores <- SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
#'
#' # Add labels to query object
#' colData(query_data)$labels <- scores$labels
#'
#' # Compute Pairwise Correlations
#' # Note: The selection of highly variable genes and desired cell types may vary
#' # based on user preference.
#' # The cell type annotation method used in this example is SingleR.
#' # User can use any other method for cell type annotation and provide
#' # the corresponding labels in the metadata.
#'
#' # Selecting highly variable genes
#' ref_var <- getTopHVGs(ref_data, n = 2000)
#' query_var <- getTopHVGs(query_data, n = 2000)
#'
#' # Intersect the gene symbols to obtain common genes
#' common_genes <- intersect(ref_var, query_var)
#'
#' # Select desired cell types
#' selected_cell_types <- c("CD4", "CD8", "B_and_plasma")
#' ref_data_subset <- ref_data[common_genes, ref_data$reclustered.broad %in% selected_cell_types]
#' query_data_subset <- query_data[common_genes, query_data$reclustered.broad %in% selected_cell_types]
#'
#' # Run PCA on the reference data
#' ref_data_subset <- runPCA(ref_data_subset)
#'
#' # Compute pairwise correlations
#' cor_matrix_avg <- calculateAveragePairwiseCorrelation(query_data = query_data_subset,
#' reference_data = ref_data_subset,
#' n_components = 10,
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' cell_types = selected_cell_types,
#' correlation_method = "spearman")
#'
#' # Visualize the results
#' plot(cor_matrix_avg)
#' @rdname calculateAveragePairwiseCorrelation
#'
#'
# Function to plot the output of the calculateAveragePairwiseCorrelation function
plot.calculateAveragePairwiseCorrelation <- function(x, ...){

# Convert matrix to dataframe
cor_df <- as.data.frame(as.table(cor_matrix_avg))
cor_df$Var1 <- factor(cor_df$Var1, levels = rownames(cor_matrix_avg))
cor_df$Var2 <- factor(cor_df$Var2, levels = rev(colnames(cor_matrix_avg)))
cor_df <- as.data.frame(as.table(x))
cor_df$Var1 <- factor(cor_df$Var1, levels = rownames(x))
cor_df$Var2 <- factor(cor_df$Var2, levels = rev(colnames(x)))

# Create the heatmap with updated colors and improved aesthetics
heatmap_plot <- ggplot2::ggplot(cor_df, ggplot2::aes(x = Var2, y = Var1)) +
Expand Down
60 changes: 1 addition & 59 deletions R/plot.calculateSampleDistances.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,67 +22,9 @@
#' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
#'
#' @seealso \code{\link{calculateSampleDistances}}
#'
#' @examples
#' # Load required libraries
#' library(scRNAseq)
#' library(scuttle)
#' library(SingleR)
#' library(scran)
#' library(scater)
#'
#' # Load data (replace with your data loading)
#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
#'
#' # Divide the data into reference and query datasets
#' set.seed(100)
#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
#' ref_data <- sce[, indices]
#' query_data <- sce[, -indices]
#'
#' # log transform datasets
#' ref_data <- scuttle::logNormCounts(ref_data)
#' query_data <- scuttle::logNormCounts(query_data)
#'
#' # Get cell type scores using SingleR (or any other cell type annotation method)
#' scores <- SingleR::SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
#'
#' # Add labels to query object
#' colData(query_data)$labels <- scores$labels
#'
#' # Selecting highly variable genes (can be customized by the user)
#' ref_var <- scran::getTopHVGs(ref_data, n = 2000)
#' query_var <- scran::getTopHVGs(query_data, n = 2000)
#'
#' # Intersect the gene symbols to obtain common genes
#' common_genes <- intersect(ref_var, query_var)
#' ref_data_subset <- ref_data[common_genes, ]
#' query_data_subset <- query_data[common_genes, ]
#'
#' # Run PCA on the reference data
#' ref_data_subset <- runPCA(ref_data_subset)
#' @rdname calculateSampleDistances
#'
#' # Plot the PC data
#' distance_data <- calculateSampleDistances(query_data_subset, ref_data_subset,
#' n_components = 10,
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' pc_subset = c(1:10))
#'
#' # Identify outliers for CD4
#' cd4_anomalies <- detectAnomaly(ref_data_subset, query_data_subset,
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' n_components = 10,
#' n_tree = 500,
#' anomaly_treshold = 0.5)$CD4
#' cd4_top5_anomalies <- names(sort(cd4_anomalies$query_anomaly_scores, decreasing = TRUE)[1:6])
#'
#' # Plot the densities of the distances
#' plot(distance_data, ref_cell_type = "CD4", sample_names = cd4_top5_anomalies)
#' plot(distance_data, ref_cell_type = "CD8", sample_names = cd4_top5_anomalies)
#'
#'
# Function to plot density functions for the reference data and the specified sample
plot.calculateSampleDistances <- function(x, ref_cell_type, sample_names, ...) {

Expand Down
56 changes: 1 addition & 55 deletions R/plot.calculateSampleSimilarityPCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,63 +20,9 @@
#' @author Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
#'
#' @seealso \code{\link{calculateSampleSimilarityPCA}}
#'
#' @examples
#' # Load required libraries
#' library(scRNAseq)
#' library(scuttle)
#' library(SingleR)
#' library(scran)
#' library(scater)
#'
#' # Load data (replace with your data loading)
#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
#'
#' # Divide the data into reference and query datasets
#' set.seed(100)
#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
#' ref_data <- sce[, indices]
#' query_data <- sce[, -indices]
#'
#' # log transform datasets
#' ref_data <- scuttle::logNormCounts(ref_data)
#' query_data <- scuttle::logNormCounts(query_data)
#'
#' # Get cell type scores using SingleR (or any other cell type annotation method)
#' scores <- SingleR::SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
#'
#' # Add labels to query object
#' colData(query_data)$labels <- scores$labels
#'
#' # Selecting highly variable genes (can be customized by the user)
#' ref_var <- scran::getTopHVGs(ref_data, n = 2000)
#' query_var <- scran::getTopHVGs(query_data, n = 2000)
#'
#' # Intersect the gene symbols to obtain common genes
#' common_genes <- intersect(ref_var, query_var)
#' ref_data_subset <- ref_data[common_genes, ]
#' query_data_subset <- query_data[common_genes, ]
#'
#' # Run PCA on the reference data (assumed to be prepared)
#' ref_data_subset <- runPCA(ref_data_subset)
#' @rdname calculateSampleSimilarityPCA
#'
#' # Store PCA anomaly data and plots
#' anomaly_output <- detectAnomaly(reference_data = ref_data_subset,
#' ref_cell_type_col = "reclustered.broad",
#' n_components = 10,
#' n_tree = 500,
#' anomaly_treshold = 0.5)
#' top6_anomalies <- names(sort(anomaly_output$Combined$reference_anomaly_scores,
#' decreasing = TRUE)[1:6])
#'
#' # Compute cosine similarity between anomalies and top PCs
#' cosine_similarities <- calculateSampleSimilarityPCA(ref_data_subset, samples = top6_anomalies,
#' pc_subset = c(1:10), n_top_vars = 50)
#' cosine_similarities
#'
#' # Plot similarities
#' plot(cosine_similarities, pc_subset = c(1:5))
#'
# Function to plot cosine similarities between samples and PCs
plot.calculateSampleSimilarityPCA <- function(x, pc_subset = c(1:5), ...){

Expand Down
53 changes: 1 addition & 52 deletions R/plot.compareCCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,58 +20,7 @@
#'
#' @seealso \code{\link{compareCCA}}
#'
#' @examples
#' # Load necessary library
#' library(scRNAseq)
#' library(scuttle)
#' library(scran)
#' library(SingleR)
#' library(ggplot2)
#' library(scater)
#'
#' # Load data
#' sce <- HeOrganAtlasData(tissue = c("Marrow"), ensembl = FALSE)
#'
#' # Divide the data into reference and query datasets
#' set.seed(100)
#' indices <- sample(ncol(assay(sce)), size = floor(0.7 * ncol(assay(sce))), replace = FALSE)
#' ref_data <- sce[, indices]
#' query_data <- sce[, -indices]
#'
#' # Log transform datasets
#' ref_data <- logNormCounts(ref_data)
#' query_data <- logNormCounts(query_data)
#'
#' # Get cell type scores using SingleR (or any other cell type annotation method)
#' scores <- SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
#'
#' # Add labels to query object
#' colData(query_data)$labels <- scores$labels
#'
#' # Selecting highly variable genes (can be customized by the user)
#' ref_var <- getTopHVGs(ref_data, n = 500)
#' query_var <- getTopHVGs(query_data, n = 500)
#'
#' # Intersect the gene symbols to obtain common genes
#' common_genes <- intersect(ref_var, query_var)
#' ref_data_subset <- ref_data[common_genes, ]
#' query_data_subset <- query_data[common_genes, ]
#'
#' # Subset reference and query data for a specific cell type
#' ref_data_subset <- ref_data_subset[, which(ref_data_subset$reclustered.broad == "CD8")]
#' query_data_subset <- query_data_subset[, which(colData(query_data_subset)$labels == "CD8")]
#'
#' # Run PCA on the reference and query datasets
#' ref_data_subset <- runPCA(ref_data_subset, ncomponents = 50)
#' query_data_subset <- runPCA(query_data_subset, ncomponents = 50)
#'
#' # Compare CCA
#' cca_comparison <- compareCCA(query_data_subset, ref_data_subset,
#' pc_subset = c(1:5))
#'
#' # Visualize output of CCA comparison
#' plot(cca_comparison)
#'
#' @rdname compareCCA
#'
# Plot visualization of output from compareCCA function
plot.compareCCA <- function(x, ...){
Expand Down
Loading

0 comments on commit 5078809

Please sign in to comment.