Git fix indentation and style for BiocCheck

ccb-hms · Apr 14, 2024 · 1b93ee8 · 1b93ee8
1 parent 3f4adde
commit 1b93ee8
Show file tree

Hide file tree

Showing 18 changed files with 211 additions and 179 deletions.
diff --git a/R/calculateCategorizationEntropy.R b/R/calculateCategorizationEntropy.R
@@ -23,7 +23,7 @@
 #'     -sum(p*log(p))} . If that input vector p is a uniform
 #'     distribution over the \code{length(p)} categories, the entropy
 #'     will be a high as possible.
-#' 
+#'
 #' @param X a matrix of category scores
 #'
 #' @param inverse_normal_transform if TRUE, apply
@@ -33,47 +33,49 @@
 #' @param plot if TRUE, plot a histogram of the entropies
 #'
 #' @return A vector of entropy values for each column in X.
-#' 
+#'
 #' @examples
 #' # Simulate 500 cells with scores on 4 possible cell types
 #' X <- rnorm(500 * 4) |> matrix(nrow = 4)
 #'
 #' # Make the first category highly scored in the first 250 cells
-#' X[1, 1:250] <- X[1, 1:250] + 5 
+#' X[1, 1:250] <- X[1, 1:250] + 5
 #'
 #' # The function will issue a message about softmaxing the scores,
 #' # and the entropy histogram will be bimodal since we made half of
 #' # the cells clearly category 1 while the other half are roughly
 #' # even.
 #' # entropy_scores <- calculateCategorizationEntropy(X)
-#' 
+#'
 #' @importFrom ggplot2 geom_histogram theme_bw
 #'
 #' @export
-calculateCategorizationEntropy <-
+calculateCategorizationEntropy <- 
     function(X,
              inverse_normal_transform = FALSE,
              plot = TRUE,
-             verbose = TRUE) {    
+             verbose = TRUE) 
+{
     if (inverse_normal_transform) {
-        # https://cran.r-project.org/web/packages/RNOmni/vignettes/RNOmni.html#inverse-normal-transformation
-        if (verbose) message("Applying global inverse normal transformation.")
-
-        # You can't do the INT column-wise (by cell) because it will
-        # set a constant "range" to the probabilities, eliminating the
-        # differences in confidence across methods we're trying to
-        # quantify.
-
-        # You can't do the INT row-wise (by cell-type) because even
-        # though different cell types exhibit different marginal
-        # distributions of scores (in SingleR at least), doing the
-        # transformation row-wise would eliminate any differences in
-        # which cell types are "hard to predict".  You don't want a
-        # score of .5 for cytotoxic T cells (hard to predict type) to
-        # overwhelm a score of .62 from erythroid type 2 (easy to
-        # predict), even though the first would be extraordinary
-        # within its cell type and the latter unexceptional within its
-        # cell type.
+        ## https://cran.r-project.org/web/packages/RNOmni/vignettes/RNOmni.html#inverse-normal-transformation
+        if (verbose)
+            message("Applying global inverse normal transformation.")
+
+        ## You can't do the INT column-wise (by cell) because it will
+        ## set a constant "range" to the probabilities, eliminating
+        ## the differences in confidence across methods we're trying
+        ## to quantify.
+
+        ## You can't do the INT row-wise (by cell-type) because even
+        ## though different cell types exhibit different marginal
+        ## distributions of scores (in SingleR at least), doing the
+        ## transformation row-wise would eliminate any differences in
+        ## which cell types are "hard to predict".  You don't want a
+        ## score of .5 for cytotoxic T cells (hard to predict type) to
+        ## overwhelm a score of .62 from erythroid type 2 (easy to
+        ## predict), even though the first would be extraordinary
+        ## within its cell type and the latter unexceptional within
+        ## its cell type.
 
         X <- inverse_normal_trans(X)
     }
@@ -95,11 +97,11 @@ calculateCategorizationEntropy <-
 
         X <- sweep(expX, MARGIN = 2, STATS = colSums(expX), FUN = "/")
     }
-
+    
     ncat <- nrow(X)
 
     max_ent <- calculate_entropy(rep(1 / ncat, ncat))
-
+    
     if (verbose) {
         message(
             "Max possible entropy given ", ncat, " categories: ",
@@ -130,7 +132,7 @@ calculateCategorizationEntropy <-
 calculate_entropy <- function(p) {
     # p is one column of X, a vector of probabilities summing to 1.
     nonzeros <- p != 0
-    
+
     -sum(p[nonzeros] * log(p[nonzeros]))
 }
 

diff --git a/R/calculateHVGOverlap.R b/R/calculateHVGOverlap.R
@@ -65,10 +65,11 @@
 #' )
 #' 
 #' @export                                       
-calculateHVGOverlap <- function(reference_genes, query_genes) {
-
+calculateHVGOverlap <- 
+    function(reference_genes, query_genes) 
+{
     ## Sanity checks
-    ## FIXME: Use BiocUtils
+    ## FIXME: Use BiocBaseUtils
     if (!is.vector(reference_genes) || !is.character(reference_genes)) {
         stop("reference_genes must be a character vector.")
     }

diff --git a/R/calculatePairwiseDistancesAndPlotDensity.R b/R/calculatePairwiseDistancesAndPlotDensity.R
@@ -68,7 +68,8 @@
 #' ref_data <- logNormCounts(ref_data)
 #' query_data <- logNormCounts(query_data)
 #'
-#' # Get cell type scores using SingleR (or any other cell type annotation method)
+#' # Get cell type scores using SingleR (or any other cell type
+#' # annotation method)
 #' scores <- SingleR(query_data, ref_data, labels = ref_data$reclustered.broad)
 #'
 #' # Add labels to query object
@@ -84,14 +85,17 @@
 #' ref_data_subset <- ref_data[common_genes, ]
 #' query_data_subset <- query_data[common_genes, ]
 #'
-#' # Example usage of the function
-#' calculatePairwiseDistancesAndPlotDensity(query_data = query_data_subset, 
-#'                                          reference_data = ref_data_subset, 
-#'                                          query_cell_type_col = "labels", 
-#'                                          ref_cell_type_col = "reclustered.broad", 
-#'                                          cell_type_query = "CD8", 
-#'                                          cell_type_reference = "CD8", 
-#'                                          distance_metric = "euclidean")
+#' ## Example usage of the function
+#'
+#' calculatePairwiseDistancesAndPlotDensity(
+#'     query_data = query_data_subset, 
+#'     reference_data = ref_data_subset,
+#'     query_cell_type_col = "labels", 
+#'     ref_cell_type_col = "reclustered.broad", 
+#'     cell_type_query = "CD8", 
+#'     cell_type_reference = "CD8", 
+#'     distance_metric = "euclidean"
+#' )
 #' 
 #' @importFrom ggplot2 ggplot
 #' @importFrom rlang .data
@@ -109,7 +113,8 @@ calculatePairwiseDistancesAndPlotDensity <-
              cell_type_query, 
              cell_type_reference, 
              distance_metric, 
-             correlation_method = "pearson") {
+             correlation_method = c("pearson", "spearman"))
+{
     ## Sanity checks
 
     ## Check if query_data is a SingleCellExperiment object
@@ -123,10 +128,12 @@ calculatePairwiseDistancesAndPlotDensity <-
     }
 
     ## Subset query and reference data to the specified cell type
-    query_data_subset <- query_data[, !is.na(query_data[[query_cell_type_col]]) & 
-                                      query_data[[query_cell_type_col]] == cell_type_query]
-    ref_data_subset <- reference_data[, !is.na(reference_data[[ref_cell_type_col]]) & 
-                                        reference_data[[ref_cell_type_col]] == cell_type_reference]
+    query_data_subset <-
+        query_data[, !is.na(query_data[[query_cell_type_col]]) & 
+                     query_data[[query_cell_type_col]] == cell_type_query]
+    ref_data_subset <-
+        reference_data[, !is.na(reference_data[[ref_cell_type_col]]) & 
+                         reference_data[[ref_cell_type_col]] == cell_type_reference]
 
     ## Convert to matrix
     query_mat <- t(as.matrix(assay(query_data_subset, "logcounts")))
@@ -136,29 +143,28 @@ calculatePairwiseDistancesAndPlotDensity <-
     combined_mat <- rbind(query_mat, ref_mat)
 
     ## Calculate pairwise distances or correlations for all comparisons
+    correlation_method <- match.arg(correlation_method)
     if (distance_metric == "correlation") {
-        if (correlation_method == "pearson") {
-            dist_matrix <- cor(t(combined_mat), method = "pearson")
-        } else if (correlation_method == "spearman") {
-            dist_matrix <- cor(t(combined_mat), method = "spearman")
-        } else {
-            stop("Invalid correlation method. Available options: 'pearson', 'spearman'")
-        }
+        dist_matrix <- cor(t(combined_mat), method = correlation_method)
     } else {
         dist_matrix <- dist(combined_mat, method = distance_metric)
     }
 
     ## Convert dist_matrix to a square matrix
     dist_matrix <- as.matrix(dist_matrix)
 
-    ## Extract the distances or correlations for the different pairwise comparisons
+    ## Extract the distances or correlations for the different pairwise
+    ## comparisons
     num_query_cells <- nrow(query_mat)
     num_ref_cells <- nrow(ref_mat)
-    dist_query_query <- dist_matrix[seq_len(num_query_cells), seq_len(num_query_cells)]
-    dist_ref_ref <- dist_matrix[(num_query_cells + 1):(num_query_cells + num_ref_cells), 
-                              (num_query_cells + 1):(num_query_cells + num_ref_cells)]
-    dist_query_ref <- dist_matrix[seq_len(num_query_cells),
-                                  (num_query_cells + 1):(num_query_cells + num_ref_cells)]
+    dist_query_query <-
+        dist_matrix[seq_len(num_query_cells), seq_len(num_query_cells)]
+    dist_ref_ref <-
+        dist_matrix[(num_query_cells + 1):(num_query_cells + num_ref_cells), 
+                    (num_query_cells + 1):(num_query_cells + num_ref_cells)]
+    dist_query_ref <-
+        dist_matrix[seq_len(num_query_cells),
+                    (num_query_cells + 1):(num_query_cells + num_ref_cells)]
 
     ## Create data frame for plotting
     dist_df <- data.frame(