Skip to content

Commit

Permalink
Git fix indentation and style for BiocCheck
Browse files Browse the repository at this point in the history
  • Loading branch information
nturaga committed Apr 14, 2024
1 parent 3f4adde commit 1b93ee8
Show file tree
Hide file tree
Showing 18 changed files with 211 additions and 179 deletions.
56 changes: 29 additions & 27 deletions R/calculateCategorizationEntropy.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' -sum(p*log(p))} . If that input vector p is a uniform
#' distribution over the \code{length(p)} categories, the entropy
#' will be a high as possible.
#'
#'
#' @param X a matrix of category scores
#'
#' @param inverse_normal_transform if TRUE, apply
Expand All @@ -33,47 +33,49 @@
#' @param plot if TRUE, plot a histogram of the entropies
#'
#' @return A vector of entropy values for each column in X.
#'
#'
#' @examples
#' # Simulate 500 cells with scores on 4 possible cell types
#' X <- rnorm(500 * 4) |> matrix(nrow = 4)
#'
#' # Make the first category highly scored in the first 250 cells
#' X[1, 1:250] <- X[1, 1:250] + 5
#' X[1, 1:250] <- X[1, 1:250] + 5
#'
#' # The function will issue a message about softmaxing the scores,
#' # and the entropy histogram will be bimodal since we made half of
#' # the cells clearly category 1 while the other half are roughly
#' # even.
#' # entropy_scores <- calculateCategorizationEntropy(X)
#'
#'
#' @importFrom ggplot2 geom_histogram theme_bw
#'
#' @export
calculateCategorizationEntropy <-
calculateCategorizationEntropy <-
function(X,
inverse_normal_transform = FALSE,
plot = TRUE,
verbose = TRUE) {
verbose = TRUE)
{
if (inverse_normal_transform) {
# https://cran.r-project.org/web/packages/RNOmni/vignettes/RNOmni.html#inverse-normal-transformation
if (verbose) message("Applying global inverse normal transformation.")

# You can't do the INT column-wise (by cell) because it will
# set a constant "range" to the probabilities, eliminating the
# differences in confidence across methods we're trying to
# quantify.

# You can't do the INT row-wise (by cell-type) because even
# though different cell types exhibit different marginal
# distributions of scores (in SingleR at least), doing the
# transformation row-wise would eliminate any differences in
# which cell types are "hard to predict". You don't want a
# score of .5 for cytotoxic T cells (hard to predict type) to
# overwhelm a score of .62 from erythroid type 2 (easy to
# predict), even though the first would be extraordinary
# within its cell type and the latter unexceptional within its
# cell type.
## https://cran.r-project.org/web/packages/RNOmni/vignettes/RNOmni.html#inverse-normal-transformation
if (verbose)
message("Applying global inverse normal transformation.")

## You can't do the INT column-wise (by cell) because it will
## set a constant "range" to the probabilities, eliminating
## the differences in confidence across methods we're trying
## to quantify.

## You can't do the INT row-wise (by cell-type) because even
## though different cell types exhibit different marginal
## distributions of scores (in SingleR at least), doing the
## transformation row-wise would eliminate any differences in
## which cell types are "hard to predict". You don't want a
## score of .5 for cytotoxic T cells (hard to predict type) to
## overwhelm a score of .62 from erythroid type 2 (easy to
## predict), even though the first would be extraordinary
## within its cell type and the latter unexceptional within
## its cell type.

X <- inverse_normal_trans(X)
}
Expand All @@ -95,11 +97,11 @@ calculateCategorizationEntropy <-

X <- sweep(expX, MARGIN = 2, STATS = colSums(expX), FUN = "/")
}

ncat <- nrow(X)

max_ent <- calculate_entropy(rep(1 / ncat, ncat))

if (verbose) {
message(
"Max possible entropy given ", ncat, " categories: ",
Expand Down Expand Up @@ -130,7 +132,7 @@ calculateCategorizationEntropy <-
calculate_entropy <- function(p) {
# p is one column of X, a vector of probabilities summing to 1.
nonzeros <- p != 0

-sum(p[nonzeros] * log(p[nonzeros]))
}

Expand Down
7 changes: 4 additions & 3 deletions R/calculateHVGOverlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@
#' )
#'
#' @export
calculateHVGOverlap <- function(reference_genes, query_genes) {

calculateHVGOverlap <-
function(reference_genes, query_genes)
{
## Sanity checks
## FIXME: Use BiocUtils
## FIXME: Use BiocBaseUtils
if (!is.vector(reference_genes) || !is.character(reference_genes)) {
stop("reference_genes must be a character vector.")
}
Expand Down
60 changes: 33 additions & 27 deletions R/calculatePairwiseDistancesAndPlotDensity.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
#' ref_data <- logNormCounts(ref_data)
#' query_data <- logNormCounts(query_data)
#'
#' # Get cell type scores using SingleR (or any other cell type annotation method)
#' # Get cell type scores using SingleR (or any other cell type
#' # annotation method)
#' scores <- SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
#'
#' # Add labels to query object
Expand All @@ -84,14 +85,17 @@
#' ref_data_subset <- ref_data[common_genes, ]
#' query_data_subset <- query_data[common_genes, ]
#'
#' # Example usage of the function
#' calculatePairwiseDistancesAndPlotDensity(query_data = query_data_subset,
#' reference_data = ref_data_subset,
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' cell_type_query = "CD8",
#' cell_type_reference = "CD8",
#' distance_metric = "euclidean")
#' ## Example usage of the function
#'
#' calculatePairwiseDistancesAndPlotDensity(
#' query_data = query_data_subset,
#' reference_data = ref_data_subset,
#' query_cell_type_col = "labels",
#' ref_cell_type_col = "reclustered.broad",
#' cell_type_query = "CD8",
#' cell_type_reference = "CD8",
#' distance_metric = "euclidean"
#' )
#'
#' @importFrom ggplot2 ggplot
#' @importFrom rlang .data
Expand All @@ -109,7 +113,8 @@ calculatePairwiseDistancesAndPlotDensity <-
cell_type_query,
cell_type_reference,
distance_metric,
correlation_method = "pearson") {
correlation_method = c("pearson", "spearman"))
{
## Sanity checks

## Check if query_data is a SingleCellExperiment object
Expand All @@ -123,10 +128,12 @@ calculatePairwiseDistancesAndPlotDensity <-
}

## Subset query and reference data to the specified cell type
query_data_subset <- query_data[, !is.na(query_data[[query_cell_type_col]]) &
query_data[[query_cell_type_col]] == cell_type_query]
ref_data_subset <- reference_data[, !is.na(reference_data[[ref_cell_type_col]]) &
reference_data[[ref_cell_type_col]] == cell_type_reference]
query_data_subset <-
query_data[, !is.na(query_data[[query_cell_type_col]]) &
query_data[[query_cell_type_col]] == cell_type_query]
ref_data_subset <-
reference_data[, !is.na(reference_data[[ref_cell_type_col]]) &
reference_data[[ref_cell_type_col]] == cell_type_reference]

## Convert to matrix
query_mat <- t(as.matrix(assay(query_data_subset, "logcounts")))
Expand All @@ -136,29 +143,28 @@ calculatePairwiseDistancesAndPlotDensity <-
combined_mat <- rbind(query_mat, ref_mat)

## Calculate pairwise distances or correlations for all comparisons
correlation_method <- match.arg(correlation_method)
if (distance_metric == "correlation") {
if (correlation_method == "pearson") {
dist_matrix <- cor(t(combined_mat), method = "pearson")
} else if (correlation_method == "spearman") {
dist_matrix <- cor(t(combined_mat), method = "spearman")
} else {
stop("Invalid correlation method. Available options: 'pearson', 'spearman'")
}
dist_matrix <- cor(t(combined_mat), method = correlation_method)
} else {
dist_matrix <- dist(combined_mat, method = distance_metric)
}

## Convert dist_matrix to a square matrix
dist_matrix <- as.matrix(dist_matrix)

## Extract the distances or correlations for the different pairwise comparisons
## Extract the distances or correlations for the different pairwise
## comparisons
num_query_cells <- nrow(query_mat)
num_ref_cells <- nrow(ref_mat)
dist_query_query <- dist_matrix[seq_len(num_query_cells), seq_len(num_query_cells)]
dist_ref_ref <- dist_matrix[(num_query_cells + 1):(num_query_cells + num_ref_cells),
(num_query_cells + 1):(num_query_cells + num_ref_cells)]
dist_query_ref <- dist_matrix[seq_len(num_query_cells),
(num_query_cells + 1):(num_query_cells + num_ref_cells)]
dist_query_query <-
dist_matrix[seq_len(num_query_cells), seq_len(num_query_cells)]
dist_ref_ref <-
dist_matrix[(num_query_cells + 1):(num_query_cells + num_ref_cells),
(num_query_cells + 1):(num_query_cells + num_ref_cells)]
dist_query_ref <-
dist_matrix[seq_len(num_query_cells),
(num_query_cells + 1):(num_query_cells + num_ref_cells)]

## Create data frame for plotting
dist_df <- data.frame(
Expand Down
Loading

0 comments on commit 1b93ee8

Please sign in to comment.