Skip to content

Commit

Permalink
run styler
Browse files Browse the repository at this point in the history
  • Loading branch information
kriemo committed Aug 23, 2024
1 parent 1562dba commit c90dc5f
Show file tree
Hide file tree
Showing 17 changed files with 959 additions and 842 deletions.
130 changes: 70 additions & 60 deletions R/cellbrowsers.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' Build reference atlases from external UCSC cellbrowsers
#'
#' @param cb_url URL of cellbrowser dataset (e.g. http://cells.ucsc.edu/?ds=cortex-dev).
Expand All @@ -11,72 +10,83 @@
#' @return reference matrix
#' @examples
#' \dontrun{
#'
#'
#' # many datasets hosted by UCSC have UMI counts in the expression matrix
#' # set if_log = FALSE if the expression matrix has not been natural log transformed
#'
#' get_ucsc_reference(cb_url = "https://cells.ucsc.edu/?ds=evocell+mus-musculus+marrow",
#' cluster_col = "Clusters", if_log = FALSE)
#'
#' get_ucsc_reference(cb_url = "http://cells.ucsc.edu/?ds=muscle-cell-atlas",
#' cluster_col = "cell_annotation",
#' if_log = FALSE)
#'
#' get_ucsc_reference(
#' cb_url = "https://cells.ucsc.edu/?ds=evocell+mus-musculus+marrow",
#' cluster_col = "Clusters", if_log = FALSE
#' )
#'
#' get_ucsc_reference(
#' cb_url = "http://cells.ucsc.edu/?ds=muscle-cell-atlas",
#' cluster_col = "cell_annotation",
#' if_log = FALSE
#' )
#' }
#' @export
get_ucsc_reference <- function(cb_url,
cluster_col,
...){
if(!requireNamespace("R.utils", quietly = TRUE)) {
stop("This function requires the R.utils package, please install\n",
"install.packages('R.utils')")
}

if(!requireNamespace("data.table", quietly = TRUE)) {
stop("This function requires the data.table package, please install\n",
"install.packages('data.table')")
}

url <- httr::parse_url(cb_url)
base_url <- url
ds <- url$query$ds

# ds can include sub-datasets with syntax, "dataset+subdataset+and-so-on"
# files are hosted at urls: dataset/subdataset/andsoon/..."
ds_split <- strsplit(ds, "+", fixed = TRUE)[[1]]
ds <- paste0(ds_split, collapse = "/")
base_url$query <- ""
...) {
if (!requireNamespace("R.utils", quietly = TRUE)) {
stop(
"This function requires the R.utils package, please install\n",
"install.packages('R.utils')"
)
}

if (!requireNamespace("data.table", quietly = TRUE)) {
stop(
"This function requires the data.table package, please install\n",
"install.packages('data.table')"
)
}

url <- httr::parse_url(cb_url)
base_url <- url
ds <- url$query$ds

mdata_url <- httr::modify_url(base_url,
path = file.path(ds, "meta.tsv"))
if(!httr::http_error(mdata_url)){
mdata <- data.table::fread(mdata_url, data.table = FALSE, sep = "\t")
} else {
stop("unable to find metadata at url: ", mdata_url)
}

mat_url <- httr::modify_url(base_url,
path = file.path(ds, "exprMatrix.tsv.gz"))
if(!httr::http_error(mat_url)){
mat <- data.table::fread(mat_url, data.table = FALSE, sep = "\t")
} else {
stop("unable to find matrix at url: ", mat_url)
}
# ds can include sub-datasets with syntax, "dataset+subdataset+and-so-on"
# files are hosted at urls: dataset/subdataset/andsoon/..."
ds_split <- strsplit(ds, "+", fixed = TRUE)[[1]]
ds <- paste0(ds_split, collapse = "/")
base_url$query <- ""

rownames(mat) <- mat[, 1]
mat[, 1] <- NULL
mat <- as.matrix(mat)

mm <- max(mat)

if(mm > 50) {
dots <- list(...)
if(!"if_log" %in% names(dots) || dots$if_log) {
warning("the data matrix has a maximum value of ", mm, "\n",
"the data are likely not log transformed,\n",
"please set the if_log argument for average clusters accordingly")
mdata_url <- httr::modify_url(base_url,
path = file.path(ds, "meta.tsv")
)
if (!httr::http_error(mdata_url)) {
mdata <- data.table::fread(mdata_url, data.table = FALSE, sep = "\t")
} else {
stop("unable to find metadata at url: ", mdata_url)
}
}

average_clusters(mat, mdata, cluster_col = cluster_col, ...)
}

mat_url <- httr::modify_url(base_url,
path = file.path(ds, "exprMatrix.tsv.gz")
)
if (!httr::http_error(mat_url)) {
mat <- data.table::fread(mat_url, data.table = FALSE, sep = "\t")
} else {
stop("unable to find matrix at url: ", mat_url)
}

rownames(mat) <- mat[, 1]
mat[, 1] <- NULL
mat <- as.matrix(mat)

mm <- max(mat)

if (mm > 50) {
dots <- list(...)
if (!"if_log" %in% names(dots) || dots$if_log) {
warning(
"the data matrix has a maximum value of ", mm, "\n",
"the data are likely not log transformed,\n",
"please set the if_log argument for average clusters accordingly"
)
}
}

average_clusters(mat, mdata, cluster_col = cluster_col, ...)
}
16 changes: 10 additions & 6 deletions R/common_dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
#'
#' cor_to_call(res)
#' @export
cor_to_call <- function(cor_mat,
cor_to_call <- function(
cor_mat,
metadata = NULL,
cluster_col = "cluster",
collapse_to_cluster = FALSE,
Expand Down Expand Up @@ -131,17 +132,18 @@ cor_to_call <- function(cor_mat,
#' cluster_col = "classified",
#' ref_mat = cbmc_ref
#' )
#'
#'
#' res2 <- cor_to_call(res, cluster_col = "classified")
#'
#'
#' call_to_metadata(
#' res = res2,
#' metadata = pbmc_meta,
#' cluster_col = "classified",
#' rename_prefix = "assigned"
#' )
#' @export
call_to_metadata <- function(res,
call_to_metadata <- function(
res,
metadata,
cluster_col,
per_cell = FALSE,
Expand Down Expand Up @@ -255,7 +257,8 @@ call_to_metadata <- function(res,
#' threshold = 0
#' )
#' @export
collapse_to_cluster <- function(res,
collapse_to_cluster <- function(
res,
metadata,
cluster_col,
threshold = 0) {
Expand Down Expand Up @@ -331,7 +334,8 @@ collapse_to_cluster <- function(res,
#'
#' cor_to_call_rank(res, threshold = "auto")
#' @export
cor_to_call_rank <- function(cor_mat,
cor_to_call_rank <- function(
cor_mat,
metadata = NULL,
cluster_col = "cluster",
collapse_to_cluster = FALSE,
Expand Down
112 changes: 58 additions & 54 deletions R/compare_genelist.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
#' mat <- binarize_expr(pbmc_avg)
#' mat[1:3, 1:3]
#' @export
binarize_expr <- function(mat,
binarize_expr <- function(
mat,
n = 1000,
cut = 0) {
expr_mat <- mat
Expand Down Expand Up @@ -57,7 +58,8 @@ binarize_expr <- function(mat,
#' @examples
#' matrixize_markers(pbmc_markers)
#' @export
matrixize_markers <- function(marker_df,
matrixize_markers <- function(
marker_df,
ranked = FALSE,
n = NULL,
step_weight = 1,
Expand Down Expand Up @@ -223,7 +225,8 @@ get_vargenes <- function(marker_mat) {
#' metric = "spearman"
#' )
#' @export
compare_lists <- function(bin_mat,
compare_lists <- function(
bin_mat,
marker_mat,
n = 30000,
metric = "hyper",
Expand All @@ -232,10 +235,10 @@ compare_lists <- function(bin_mat,
# check if matrix is binarized
if (is.list(marker_mat)) {
message("list of markers instead of matrix, only supports jaccard")
}
}
if ((length(unique(bin_mat[, 1])) > 2) & (metric != "gsea")) {
warning("non-binarized data, running spearman instead")
metric <- "spearman"
warning("non-binarized data, running spearman instead")
metric <- "spearman"
}

if (details_out) {
Expand All @@ -246,11 +249,11 @@ compare_lists <- function(bin_mat,
names(marker_mat),
function(y) {
marker_list <- unlist(marker_mat[[y]],
use.names = FALSE
use.names = FALSE
)
bin_temp <- bin_mat[, x][bin_mat[, x] == 1]
list_top <- names(bin_temp)

genes <- paste(intersect(list_top, marker_list), collapse = ",")
genes
}
Expand All @@ -259,7 +262,7 @@ compare_lists <- function(bin_mat,
}
)
}

if (metric == "hyper") {
out <- lapply(
colnames(bin_mat),
Expand Down Expand Up @@ -287,45 +290,45 @@ compare_lists <- function(bin_mat,
}
} else if (metric == "jaccard") {
if (is.list(marker_mat)) {
out <- lapply(
colnames(bin_mat),
function(x) {
per_col <- lapply(
names(marker_mat),
function(y) {
marker_list <- unlist(marker_mat[[y]],
use.names = FALSE
)
bin_temp <- bin_mat[, x][bin_mat[, x] == 1]
list_top <- names(bin_temp)

I <- length(intersect(list_top, marker_list))
I / (length(list_top) + length(marker_list) - I)
out <- lapply(
colnames(bin_mat),
function(x) {
per_col <- lapply(
names(marker_mat),
function(y) {
marker_list <- unlist(marker_mat[[y]],
use.names = FALSE
)
bin_temp <- bin_mat[, x][bin_mat[, x] == 1]
list_top <- names(bin_temp)

I <- length(intersect(list_top, marker_list))
I / (length(list_top) + length(marker_list) - I)
}
)
do.call(cbind, per_col)
}
)
do.call(cbind, per_col)
}
)
)
} else {
out <- lapply(
colnames(bin_mat),
function(x) {
per_col <- lapply(
colnames(marker_mat),
function(y) {
marker_list <- unlist(marker_mat[, y],
use.names = FALSE
)
bin_temp <- bin_mat[, x][bin_mat[, x] == 1]
list_top <- names(bin_temp)

I <- length(intersect(list_top, marker_list))
I / (length(list_top) + length(marker_list) - I)
out <- lapply(
colnames(bin_mat),
function(x) {
per_col <- lapply(
colnames(marker_mat),
function(y) {
marker_list <- unlist(marker_mat[, y],
use.names = FALSE
)
bin_temp <- bin_mat[, x][bin_mat[, x] == 1]
list_top <- names(bin_temp)

I <- length(intersect(list_top, marker_list))
I / (length(list_top) + length(marker_list) - I)
}
)
do.call(cbind, per_col)
}
)
do.call(cbind, per_col)
}
)
)
}
} else if (metric == "spearman") {
out <- lapply(
Expand Down Expand Up @@ -384,15 +387,14 @@ compare_lists <- function(bin_mat,

if (metric != "gsea") {
if (!is.list(marker_mat)) {
res <- do.call(rbind, out)
rownames(res) <- colnames(bin_mat)
colnames(res) <- colnames(marker_mat)
res <- do.call(rbind, out)
rownames(res) <- colnames(bin_mat)
colnames(res) <- colnames(marker_mat)
} else {
res <- do.call(rbind, out)
rownames(res) <- colnames(bin_mat)
colnames(res) <- names(marker_mat)
res <- do.call(rbind, out)
rownames(res) <- colnames(bin_mat)
colnames(res) <- names(marker_mat)
}

}

if (output_high) {
Expand All @@ -407,8 +409,10 @@ compare_lists <- function(bin_mat,
spe <- do.call(rbind, spe)
rownames(spe) <- colnames(bin_mat)
colnames(spe) <- names(marker_mat)
list(res = res,
details = spe)
list(
res = res,
details = spe
)
} else {
res
}
Expand Down
Loading

0 comments on commit c90dc5f

Please sign in to comment.