Skip to content

Commit

Permalink
Merge pull request #73 from AnthonyChristidis/main
Browse files Browse the repository at this point in the history
Add option to specify assay.
  • Loading branch information
AnthonyChristidis authored Sep 7, 2024
2 parents c581546 + a92f82b commit 9045bcf
Show file tree
Hide file tree
Showing 46 changed files with 319 additions and 2,533 deletions.
27 changes: 20 additions & 7 deletions R/argumentCheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' @param pc_subset_ref A numeric vector specifying the principal components to be used for the reference data. If `NULL`, no check is performed.
#' @param common_rotation_genes If TRUE, check the rotation matrices of the reference and query data and ensure they have the same genes.
#' Default is FALSE.
#' @param assay_name Name of the assay on which to perform computations. If `NULL`, no check is performed.
#'
#' @keywords internal
#'
Expand All @@ -54,32 +55,34 @@ argumentCheck <- function(query_data = NULL,
cell_names_ref = NULL,
pc_subset_query = NULL,
pc_subset_ref = NULL,
common_rotation_genes = FALSE) {
common_rotation_genes = FALSE,
assay_name = NULL) {

# Check if query_data is a SingleCellExperiment object
if (!is.null(query_data)) {

if (!is(query_data, "SingleCellExperiment")) {
stop("'query_data' must be a SingleCellExperiment object.")
}

if (!("logcounts" %in% SummarizedExperiment::assayNames(query_data))) {
stop("'query_data' does not contain 'logcounts' in its assays.")
}
if(!is.null(assay_name) && !(assay_name %in% SummarizedExperiment::assayNames(query_data)))
stop("\'query_data\' does not contain the specified assay.")
}

# Check if reference_data is a SingleCellExperiment object
if (!is.null(reference_data)) {

if (!is(reference_data, "SingleCellExperiment")) {
stop("'reference_data' must be a SingleCellExperiment object.")
}

if (!("logcounts" %in% SummarizedExperiment::assayNames(reference_data))) {
stop("'reference_data' does not contain 'logcounts' in its assays.")
}
if(!is.null(assay_name) && !(assay_name %in% SummarizedExperiment::assayNames(reference_data)))
stop("\'reference_data\' does not contain the specified assay.")
}

# Check if query_cell_type_col is a character string of length 1 and exists in query_data
if (!is.null(query_cell_type_col)) {

if (!is.null(query_data)) {
if (!is.character(query_cell_type_col) ||
length(query_cell_type_col) != 1) {
Expand All @@ -94,6 +97,7 @@ argumentCheck <- function(query_data = NULL,

# Check if ref_cell_type_col is a character string of length 1 and exists in reference_data
if (!is.null(ref_cell_type_col)) {

if (!is.null(reference_data)) {
if (!is.character(ref_cell_type_col) ||
length(ref_cell_type_col) != 1) {
Expand All @@ -108,6 +112,7 @@ argumentCheck <- function(query_data = NULL,

# Check if cell_types are available in the SingleCellExperiment object(s)
if (!is.null(cell_types)) {

if (!is.null(query_data)) {
if (!all(cell_types %in%
unique(query_data[[query_cell_type_col]]))) {
Expand All @@ -125,13 +130,15 @@ argumentCheck <- function(query_data = NULL,

# Check that the SingleCellExperiment object(s) have a unique cell type
if (isTRUE(unique_cell_type)) {

if (!is.null(query_data)) {
if (length(unique(query_data[[query_cell_type_col]])) > 1) {
stop("This function should be used when there is only one cell type in 'query_data'.")
}
}

if (!is.null(reference_data)) {

if (length(unique(reference_data[[ref_cell_type_col]])) > 1) {
stop("This function should be used when there is only one cell type in 'reference_data'.")
}
Expand All @@ -146,27 +153,31 @@ argumentCheck <- function(query_data = NULL,

# Check the number of cell types for plot function
if (plot_function == TRUE) {

if (length(unique(cell_types)) > 10) {
stop("The maximum number of cell types for plotting is 10.")
}
}

# Check cell_names contain valid cell names in query_data
if (!is.null(cell_names_query)) {

if (!all(cell_names_query %in% colnames(query_data))) {
stop("'cell_names' contains one or more cells that are not available in 'query_data'.")
}
}

# Check cell_names contain valid cell names in reference_data
if (!is.null(cell_names_ref)) {

if (!all(cell_names_ref %in% colnames(reference_data))) {
stop("'cell_names' contains one or more cells that are not available in 'reference_data'.")
}
}

# Check PC subset for query_data
if (!is.null(pc_subset_query)) {

# Check if "PCA" is present in query's reduced dimensions
if (!"PCA" %in% names(reducedDims(query_data))) {
stop("'query_data' must have pre-computed PCA in 'reducedDims'.")
Expand All @@ -180,6 +191,7 @@ argumentCheck <- function(query_data = NULL,

# Check PC subset for reference_data
if (!is.null(pc_subset_ref)) {

# Check if "PCA" is present in reference's reduced dimensions
if (!"PCA" %in% names(reducedDims(reference_data))) {
stop("Reference data must have pre-computed PCA in 'reducedDims'.")
Expand All @@ -193,6 +205,7 @@ argumentCheck <- function(query_data = NULL,

# Check if the rotation matrices have the same genes in the same order
if (common_rotation_genes == TRUE) {

# Check if the rotation matrices have the same number of genes
if (ncol(attributes(reducedDim(query_data, "PCA"))[["rotation"]]) !=
ncol(attributes(reducedDim(reference_data, "PCA"))[["rotation"]])) {
Expand Down
10 changes: 7 additions & 3 deletions R/boxplotPCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#' @param ref_cell_type_col The column name in the \code{colData} of \code{reference_data} that identifies the cell types.
#' @param cell_types A character vector specifying the cell types to include in the plot. If NULL, all cell types are included.
#' @param pc_subset A numeric vector specifying which principal components to include in the plot. Default is PC1 to PC5.
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A ggplot object representing the boxplots of specified principal components for the given cell types and datasets.
#'
Expand Down Expand Up @@ -46,15 +47,17 @@ boxplotPCA <- function(query_data,
query_cell_type_col,
ref_cell_type_col,
cell_types = NULL,
pc_subset = 1:5){
pc_subset = 1:5,
assay_name = "logcounts"){

# Check standard input arguments
argumentCheck(query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
cell_types = cell_types,
pc_subset_ref = pc_subset)
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Get common cell types if they are not specified by user
if(is.null(cell_types)){
Expand All @@ -67,7 +70,8 @@ boxplotPCA <- function(query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset)
pc_subset = pc_subset,
assay_name = assay_name)

# Create the long format data frame manually
pca_output <- pca_output[!is.na(pca_output[["cell_type"]]),]
Expand Down
14 changes: 9 additions & 5 deletions R/calculateAveragePairwiseCorrelation.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @param pc_subset A numeric vector specifying which principal components to use in the analysis. Default is 1:10.
#' If set to \code{NULL} then no dimensionality reduction is performed and the assay data is used directly for computations.
#' @param correlation_method The correlation method to use for calculating pairwise correlations.
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A matrix containing the average pairwise correlation values.
#' Rows and columns are labeled with the cell types. Each element
Expand Down Expand Up @@ -57,7 +58,8 @@ calculateAveragePairwiseCorrelation <- function(
ref_cell_type_col,
cell_types = NULL,
pc_subset = 1:10,
correlation_method = c("spearman", "pearson")) {
correlation_method = c("spearman", "pearson"),
assay_name = "logcounts") {

# Match correlation method argument
correlation_method <- match.arg(correlation_method)
Expand All @@ -68,7 +70,8 @@ calculateAveragePairwiseCorrelation <- function(
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
cell_types = cell_types,
pc_subset_ref = pc_subset)
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Get common cell types if they are not specified by user
if(is.null(cell_types)){
Expand All @@ -86,7 +89,8 @@ calculateAveragePairwiseCorrelation <- function(
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset)
pc_subset = pc_subset,
assay_name = assay_name)
ref_mat <- pca_output[which(
pca_output[["dataset"]] == "Reference" &
pca_output[["cell_type"]] == type2),
Expand All @@ -103,8 +107,8 @@ calculateAveragePairwiseCorrelation <- function(
ref_subset <- reference_data[, which(
reference_data[[ref_cell_type_col]] == type2), drop = FALSE]

query_mat <- t(as.matrix(assay(query_subset, "logcounts")))
ref_mat <- t(as.matrix(assay(ref_subset, "logcounts")))
query_mat <- t(as.matrix(assay(query_subset, assay_name)))
ref_mat <- t(as.matrix(assay(ref_subset, assay_name)))
}

cor_matrix <- cor(t(query_mat), t(ref_mat),
Expand Down
10 changes: 7 additions & 3 deletions R/calculateCellDistances.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#' that identifies the cell types.
#' @param cell_types A character vector specifying the cell types to include in the plot. If NULL, all cell types are included.
#' @param pc_subset A numeric vector specifying which principal components to include in the plot. Default 1:5.
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A list containing distance data for each cell type. Each entry in the list contains:
#' \describe{
Expand Down Expand Up @@ -62,15 +63,17 @@ calculateCellDistances <- function(query_data,
query_cell_type_col,
ref_cell_type_col,
cell_types = NULL,
pc_subset = 1:5) {
pc_subset = 1:5,
assay_name = "logcounts") {

# Check standard input arguments
argumentCheck(query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
cell_types = cell_types,
pc_subset_ref = pc_subset)
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Get common cell types if they are not specified by user
if(is.null(cell_types)){
Expand All @@ -83,7 +86,8 @@ calculateCellDistances <- function(query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset)
pc_subset = pc_subset,
assay_name = assay_name)

# Create a list to store distance data for each cell type
distance_data <- vector("list", length = length(cell_types))
Expand Down
15 changes: 10 additions & 5 deletions R/calculateCellDistancesSimilarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
#' that identifies the cell types.
#' @param cell_names A character vector specifying the names of the query cells for which to compute distance measures.
#' @param pc_subset A numeric vector specifying which principal components to include in the plot. Default is 1:5.
#'
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A list containing distance data for each cell type. Each entry in the list contains:
#' \describe{
#' \item{ref_distances}{A vector of all pairwise distances within the reference subset for the cell type.}
Expand Down Expand Up @@ -73,15 +74,17 @@ calculateCellDistancesSimilarity <- function(query_data,
query_cell_type_col,
ref_cell_type_col,
cell_names,
pc_subset = 1:5) {
pc_subset = 1:5,
assay_name = "logcounts") {

# Check standard input arguments
argumentCheck(query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
cell_names_query = cell_names,
pc_subset_ref = pc_subset)
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Compute distance data
query_data_subset <- query_data[, cell_names, drop = FALSE]
Expand All @@ -90,7 +93,8 @@ calculateCellDistancesSimilarity <- function(query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset)
pc_subset = pc_subset,
assay_name = assay_name)

# Initialize empty lists to store results
bhattacharyya_list <- hellinger_list <-
Expand All @@ -114,7 +118,8 @@ calculateCellDistancesSimilarity <- function(query_data,
for (i in seq_len(length(cell_names))) {

# Extract distances from the current cell to reference cells
cell_distances <- distance_data[[cell_type]][["query_to_ref_distances"]][cell_names[i], , drop = FALSE]
cell_distances <-
distance_data[[cell_type]][["query_to_ref_distances"]][cell_names[i], , drop = FALSE]

# Compute density of cell distances
cell_density <- density(cell_distances)
Expand Down
9 changes: 6 additions & 3 deletions R/calculateCellSimilarityPCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#' @param cell_names A character vector specifying the cell names for which to compute the similarity.
#' @param pc_subset A numeric vector specifying the subset of principal components to consider. Default is 1:5..
#' @param n_top_vars An integer indicating the number of top loading variables to consider for each PC. Default is 50.
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A data frame containing cosine similarity values between cells for each selected principal component.
#'
Expand Down Expand Up @@ -53,12 +54,14 @@
calculateCellSimilarityPCA <- function(se_object,
cell_names,
pc_subset = 1:5,
n_top_vars = 50){
n_top_vars = 50,
assay_name = "logcounts"){

# Check standard input arguments
argumentCheck(query_data = se_object,
cell_names_query = cell_names,
pc_subset_query = pc_subset)
pc_subset_query = pc_subset,
assay_name = assay_name)

# Check if n_top_vars is a positive integer
if (!is.numeric(n_top_vars) || n_top_vars <= 0 ||
Expand Down Expand Up @@ -109,7 +112,7 @@ calculateCellSimilarityPCA <- function(se_object,

# Calculate similarities
assay_mat <- t(as.matrix(assay(se_object[, cell_names, drop = FALSE],
"logcounts")))
assay_name)))
similarities <- .computeCosineSimilarity(assay_mat, rotation_mat,
high_loading_vars)

Expand Down
10 changes: 7 additions & 3 deletions R/calculateCramerPValue.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @param query_cell_type_col The column name in the \code{colData} of \code{query_data} that identifies the cell types.
#' @param cell_types A character vector specifying the cell types to include in the plot. If NULL, all cell types are included.
#' @param pc_subset A numeric vector specifying which principal components to include in the plot. Default is PC1 to PC5.
#' @param assay_name Name of the assay on which to perform computations. Default is "logcounts".
#'
#' @return A named vector of p-values from the Cramer test for each cell type.
#'
Expand Down Expand Up @@ -49,15 +50,17 @@ calculateCramerPValue <- function(reference_data,
ref_cell_type_col,
query_cell_type_col = NULL,
cell_types = NULL,
pc_subset = 1:5) {
pc_subset = 1:5,
assay_name = "logcounts") {

# Check standard input arguments
argumentCheck(query_data = query_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
cell_types = cell_types,
pc_subset_ref = pc_subset)
pc_subset_ref = pc_subset,
assay_name = assay_name)

# Get common cell types if they are not specified by user
if(is.null(cell_types)){
Expand All @@ -76,7 +79,8 @@ calculateCramerPValue <- function(reference_data,
reference_data = reference_data,
query_cell_type_col = query_cell_type_col,
ref_cell_type_col = ref_cell_type_col,
pc_subset = pc_subset)
pc_subset = pc_subset,
assay_name = assay_name)
pca_output <- pca_output[pca_output[["cell_type"]] %in% cell_types,]

# Set data for Cramer test
Expand Down
Loading

0 comments on commit 9045bcf

Please sign in to comment.