From 824d3c69d43d7cd6a19947aecd394d60d243701f Mon Sep 17 00:00:00 2001 From: Ryan Palaganas Date: Fri, 3 Nov 2023 16:59:13 -0400 Subject: [PATCH 01/33] updated projectionDriveR function --- projectionDriveRfun.R | 447 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 447 insertions(+) create mode 100644 projectionDriveRfun.R diff --git a/projectionDriveRfun.R b/projectionDriveRfun.R new file mode 100644 index 0000000..bff9a07 --- /dev/null +++ b/projectionDriveRfun.R @@ -0,0 +1,447 @@ +####################################################################################################################################### +#' bonferroniCorrectedDifferences +#' +#' Calculate the weighted and unweighted difference in means for each measurement between two groups. +#' @param group1 count matrix 1 +#' @param group2 count matrix 2 +#' @param diff_weights loadings to weight the differential expression between the groups +#' @param pvalue significance value to threshold at +#' @importFrom stats var +#' @importFrom ggrepel geom_label_repel +#' @import dplyr +bonferroniCorrectedDifferences <- function( + group1, + group2, + diff_weights = NULL, + pvalue) + + { + #if passed from projectionDrivers, cellgroup1 and cellgroup 1 will have the same rows (genes) + + if(!(dim(group1)[1] == dim(group2)[1])){ + stop("Rows of two cell group matrices are not identical") + } + + if(any(is.na(group1)) | any(is.na(group2))){ + stop("NA values in count matrices not allowed") + } + + ##Take means over all genes and calculate differences + group1_mean <- apply(group1, 1, mean) + group2_mean <- apply(group2, 1, mean) + mean_diff <- group1_mean - group2_mean #if this is log normalized counts the mean difference is actually log(group1/group2) + + + #if weights are provided, use them to weight the difference in means + if(!is.null(diff_weights)){ + + #check that genes exactly match between difference vector and weight vector + if(!(all(names(mean_diff) == names(diff_weights)))){ + stop("Names of loadings and counts do not match") + } + + mean_diff <- mean_diff * diff_weights + } + + + + ##Stats and corrections beginning here + #calculate confidence intervals + dimensionality <- length(mean_diff) #number of measurements (genes) + + n1_samples <- dim(group1)[2] #number of samples (cells) + n2_samples <- dim(group2)[2] + bon_correct <- pvalue / (2*dimensionality) #bonferroni correction + qval <- 1 - bon_correct + + tval <- qt(p = qval, df = n1_samples + n2_samples -2) #critical value + + #calculate p values + group1_var <- apply(group1, 1, var) #variance of genes across group 1 + group2_var <- apply(group2, 1, var) #variance of genes across group 2 + + #vartest <- group1_var / group2_var #test to see if variance across groups is equal, often not equal + + pooled <- ((n1_samples-1)*group1_var + (n2_samples-1)*group2_var) / (n1_samples+n2_samples-2) #pooled standard deviation + + #welch t test + deltaS <- sqrt((group1_var / n1_samples) + (group2_var / n2_samples)) #variance calculation + + welch_estimate <- round(mean_diff / deltaS, digits = 10) #welch t statistic, rounded to 10 digits to avoid infinite decimals + + df <- (((group1_var / n1_samples) + (group2_var / n2_samples))^2) / ((((group1_var / n1_samples)^2) / (n1_samples - 1)) + (((group2_var / n2_samples)^2) / (n2_samples - 1))) #Welch-Satterthwaite equation for degrees of freedom + + welch_pvalue <- 2*pt(-abs(welch_estimate), df=df) #calculate p value from estimate (tvalue) + + welch_p_value_boncorrected <- p.adjust(welch_pvalue, method = "bonferroni", n = dimensionality) #bonferroni correction + + # replace p values equal to zero with the smallest machine value possible + if (min(welch_p_value_boncorrected, na.rm=TRUE) <= .Machine$double.xmin) { + zp <- length(which(welch_p_value_boncorrected <= .Machine$double.xmin)) + warning(paste(zp,"P value(s) equal 0.", + "Converting any values less than", .Machine$double.xmin, "to minimum possible value..."), + call. = FALSE) + welch_p_value_boncorrected[welch_p_value_boncorrected <= .Machine$double.xmin] <- .Machine$double.xmin + } + + #establish dataframe to populate in the following for loop + plusminus = data.frame(low = rep(NA_integer_, dimensionality), + high = rep(NA_integer_, dimensionality), + estimate = rep(NA_integer_, dimensionality), + mean_diff = rep(NA_integer_, dimensionality), + welch_pvalue = rep(NA_integer_, dimensionality), + welch_p_value_boncorrected = rep(NA_integer_, dimensionality), + ref_mean = rep(NA_integer_, dimensionality), + test_mean = rep(NA_integer_, dimensionality), + gene = rep(NA_integer_, dimensionality)) + rownames(plusminus) <- names(mean_diff) + + + #for each gene, calculate confidence interval around mean + for(i in 1:dimensionality){ + + scale = tval * sqrt(pooled[i] * (1/n1_samples + 1/n2_samples)) + + + plusminus[i, "low"] <- mean_diff[i] - scale #low estimate + plusminus[i, "high"] <- mean_diff[i] + scale #high estimate + plusminus[i, "estimate"] <- welch_estimate[i] + plusminus[i, "mean_diff"] <- mean_diff[i] + plusminus[i, "welch_pvalue"] <- welch_pvalue[i] + plusminus[i, "welch_p_value_boncorrected"] <- welch_p_value_boncorrected[i] + plusminus[i, "ref_mean"] <- group2_mean[i] + plusminus[i, "test_mean"] <- group1_mean[i] + plusminus[i, "gene"] <- names(mean_diff[i]) + + + } + return(plusminus) +} + +####################################################################################################################################### +#' plotConfidenceIntervals +#' @param confidence_intervals #confidence_interval is a data.frame or matrix with two columns (low, high). Genes must be rownames +#' @param pattern_name name of pattern being plotted, if NULL, defaults to 'weights' +#' @param sort Sorts genes by increasing estimate order if true +#' @param genes vector of genes that can be used to subset data +#' @param weights if provided, will generate heatmap for pattern weights +#' @param weights_clip upper limit of pattern weights +#' @param weights_vis_norm method of transforming weights to percentiles, currently only supports 'none' and 'quantiles' +#' @param weighted specifies whether the confidence intervals in use are weighted by the pattern and labels plots accordingly +#' @importFrom viridis scale_fill_viridis +#' @importFrom scales squish +plotConfidenceIntervals <- function( + confidence_intervals, + interval_name = c("low","high"), + pattern_name = NULL, + sort = T, + genes = NULL, + weights = NULL, + weights_clip = 0.99, + weights_vis_norm = "none", + weighted = F){ + + if(weights_clip < 0 | weights_clip > 1){ + stop("weights_clip must be numeric between 0 and 1") + } + + if(!(weights_vis_norm %in% c("none","quantiles"))){ + stop("weights_vis_norm must be either 'none' or 'quantiles'") + } + + if(weighted == F){ + lab = "Unweighted" + } else{ + lab = "Weighted" + } + #gene names were stored as rownames, make sure high and low estimates are stored + confidence_intervals$gene_names <- rownames(confidence_intervals) + confidence_intervals$low <- confidence_intervals[,interval_name[1]] + confidence_intervals$high <- confidence_intervals[,interval_name[2]] + + n <- dim(confidence_intervals)[1] + confidence_intervals <- confidence_intervals %>% + mutate( + mid = (high+low)/2, #estimate, used for point position + positive = mid > 0) #upregulated, used for color scheme + + if(!is.null(genes)){ + #select genes provided and get them in that order + if(!(is.character(genes))){ stop("Genes must be provided as a character vector") } + n <- length(genes) + message(paste0("Selecting ", n, " features")) + confidence_intervals <- confidence_intervals[genes,] + + } + + if(sort){ + #order in increasing order on estimates, and create index variable + message("sorting genes in increasing order of estimates...") + confidence_intervals <- confidence_intervals %>% mutate(idx = dense_rank(mid)) %>% + arrange(mid) + + } else{ + #if not sorted, create index variable for current order + confidence_intervals <- confidence_intervals %>% mutate(idx = 1:n) + } + + #genereate point range plot + ci_plot <- ggplot(data = confidence_intervals, aes(y = idx, x = mid)) + geom_pointrange(aes(xmin = low, xmax = high, color = positive)) + + geom_point(aes(x = mid, y = idx), fill ="black",color = "black") + + theme_minimal() + + xlab("Difference in group means") + + ylab("Genes") + + geom_vline(xintercept = 0, color = "black", linetype = "dashed") + + theme(legend.position = "none") + + ggtitle(lab) + + #if provided, create heatmap for pattern weights + if(!is.null(weights)){ + + #check that weights are formatted as a named vector + if(!(is.numeric(weights))){ stop("Weights must be provided as a numeric vector") } + if(is.null(names(weights))){ stop("Weights must have names that match estimates")} + + #either use pattern_name, or if not provided, just label heatmap with "weights" + hm_name <- ifelse(is.null(pattern_name), "weights", pattern_name) + + #maintain established order from the pointrange plot + ordered_weights <- weights[rownames(confidence_intervals)] + + if(weights_vis_norm == "quantiles"){ + #transform to percentiles from 0 to 1 + ordered_weights <- trunc(rank(ordered_weights))/length(ordered_weights) + hm_name <- paste0(hm_name, " (quantiles)") #append quantile to plot name + } + + confidence_intervals$weights <- ordered_weights + + #generate heatmap + wt_heatmap <- ggplot(data = confidence_intervals) + + geom_tile(aes(x = 1, y = 1:n, fill = weights)) + + scale_fill_viridis(limits=c(0, quantile(ordered_weights,weights_clip )), + oob=squish, + name = hm_name) + + theme_void() + + } else{ wt_heatmap = NULL} #if weights aren't provided, return NULL + + return(list("ci_estimates_plot" = ci_plot, + "feature_order" = rownames(confidence_intervals), + "weights_heatmap" = wt_heatmap)) +} + + +####################################################################################################################################### +#' projectionDriveR +#' +#' Calculate the weighted difference in expression between two groups (group1 - group2) +#' +#' @importFrom cowplot plot_grid +#' @importFrom ggpubr ggarrange +#' @param cellgroup1 gene x cell count matrix for cell group 1 +#' @param cellgroup2 gene x cell count matrix for cell group 2 +#' @param loadings A matrix of continuous values defining the features +#' @param pattern_name column of loadings for which drivers will be calculated. +#' @param pvalue confidence level for the bonferroni confidence intervals. Default 1e-5 +#' @param loadingsNames a vector with names of loading rows. Defaults to rownames. +#' @param display boolean. Whether or not to plot and display confidence intervals +#' @param normalize_pattern Boolean. Whether or not to normalize pattern weights. +#' @return A list with weighted mean differences, mean differences, and differential genes that meet the provided signficance threshold. +#' @export +#' +#' +projectionDriveR<-function( + cellgroup1, #gene x cell count matrix for cell group 1 + cellgroup2, #gene x cell count matrix for cell group 2 + loadings, # a matrix of continous values to be projected with unique rownames + loadingsNames = NULL, # a vector with names of loadings rows + pattern_name, + pvalue = 1e-5, + display = TRUE, + normalize_pattern = TRUE +){ + + #Count matrices can be class matrix, data.frame, sparse.matrix, ... anything that is coercible by as.matrix() + + #check that alpha significance level is appropriate + if(pvalue <= 0 | pvalue >= 1){ + stop("pvalue must be numeric between 0 and 1") + } + + #Make sure provided pattern string is a character vector of length one + if(length(pattern_name) != 1 | !is.character(pattern_name)){ + stop("provided pattern_name must be a character vector of length one") + } + + #set loadings rownames if provided + if(!is.null(loadingsNames)){ + rownames(loadings) <- loadingsNames + } + + #pattern weights must be formatted as a matrix for normalization + if(pattern_name %in% colnames(loadings)){ + pattern <- loadings[,pattern_name, drop = F] #data.frame + pattern <- as.matrix(pattern) + } else { + stop(paste0("Provided pattern_name ",pattern_name, " is not a column in provided loadings")) + } + + #extract names of data objects + group1name <- deparse(substitute(cellgroup1)) + + group2name <- deparse(substitute(cellgroup2)) + + + #Filter the two count matrices and the pattern weights to include the intersection of their features + #shared rows in two data matrices + filtered_data <-geneMatchR(data1=cellgroup1, data2=cellgroup2, data1Names=NULL, data2Names=NULL, merge=FALSE) + print(paste(as.character(dim(filtered_data[[2]])[1]),'row names matched between datasets')) + + cellgroup1 <- filtered_data[[2]] #geneMatchR flips the indexes + cellgroup2 <- filtered_data[[1]] + + + #shared rows in data matrices and loadings + filtered_weights <- geneMatchR(data1 = cellgroup1, data2 = pattern, data1Names = NULL, data2Names = NULL, merge = F) + dimensionality_final <- dim(filtered_weights[[2]])[1] + + print(paste('Updated dimension of data:',as.character(paste(dimensionality_final, collapse = ' ')))) + + if(dimensionality_final == 0){ + stop("No features matched by rownames of count matrix and rownames of loadings") + } + + pattern_filtered <- filtered_weights[[1]] + + cellgroup1_filtered <- filtered_weights[[2]] + #do second filtering on other cell group so all genes are consistent + cellgroup2_filtered <- cellgroup2[rownames(cellgroup1_filtered),] + + + #normalize pattern weights + if(normalize_pattern){ + weight_norm <- norm(pattern_filtered) #square of sums of squares (sum for all positive values) + num_nonzero <- sum(pattern_filtered > 0) #number of nonzero weights + pattern_filtered <- pattern_filtered * num_nonzero / weight_norm + } + + #cast feature weights to a named vector + pattern_normalized_vec <- pattern_filtered[,1] + names(pattern_normalized_vec) <- rownames(pattern_filtered) + + + + + #weighted confidence intervals of differences in cluster means + weighted_drivers_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, + group2 = cellgroup2_filtered, + diff_weights = pattern_normalized_vec, + pvalue = pvalue) + weighted_welch_sig <- rownames(weighted_drivers_bonferroni[which(weighted_drivers_bonferroni$welch_p_value_boncorrected <= pvalue),]) + + # Apply the t test function to unweighted expression matrices and call significance + n_tests <- nrow(cellgroup1_filtered) + cat("ntests are:", n_tests, "\n") + + + #unweighted confidence intervals of difference in cluster means + mean_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, + group2 = cellgroup2_filtered, + diff_weights = NULL, + pvalue = pvalue) + welch_sig <- rownames(mean_bonferroni[which(mean_bonferroni$welch_p_value_boncorrected <= pvalue),]) + + #Determine which genes have significantly non-zero mean difference and weighted mean difference + #significant + weighted_sig_idx <- apply(weighted_drivers_bonferroni[,1:2], 1, function(interval){ + (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) + }) + + weighted_sig_genes <- weighted_drivers_bonferroni[weighted_sig_idx,] + + mean_sig_idx <- apply(mean_bonferroni[,1:2], 1, function(interval){ + (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) + }) + + unweighted_sig_genes <- mean_bonferroni[mean_sig_idx,] + + + #genes that are collectively either up or down + shared_genes <- base::intersect( + rownames(weighted_drivers_bonferroni)[weighted_sig_idx], + rownames(mean_bonferroni)[mean_sig_idx]) + cat("the length of shared genes are:", length(shared_genes), '\n') + + shared_genes2 <- base::intersect( + welch_sig, weighted_welch_sig) + + + if(length(shared_genes) == 0){ + #no genes were significant. Return info we have and skip plotting. + warning("No features (and weighted features) were significantly differentially used between the two groups") + return(list( + mean_ci = mean_bonferroni, + weighted_mean_ci = weighted_drivers_bonferroni, + significant_shared_genes = shared_genes, + plotted_ci = NULL, + weighted_sig_genes = rownames(weighted_sig_genes), + unweighted_sig_genes = rownames(unweighted_sig_genes), + reference_matrix = paste0(group2name), + test_matrix = paste0(group1name), + welch_sig = welch_sig, + weighted_welch_sig = weighted_welch_sig, + welch_significant_shared_genes = shared_genes2, + pvalue = pvalue)) + } + + + conf_intervals <- mean_bonferroni[shared_genes,] + sig_weights <- pattern_normalized_vec[shared_genes] + + weighted_conf_intervals <- weighted_drivers_bonferroni[shared_genes,] + + #create confidence interval plot (unweighted) + pl <- plotConfidenceIntervals(conf_intervals, + weights = sig_weights, + pattern_name = pattern_name, + weighted = F) + #weighted + pl_w <- plotConfidenceIntervals(weighted_conf_intervals, + weights = sig_weights, + pattern_name = pattern_name, + weighted = T) + plots <- list(unweighted = pl,weighted = pl_w) + if(display){ + #print confidence interval pointrange plot + pl1_u <- (cowplot::plot_grid(pl[["ci_estimates_plot"]], + pl[["weights_heatmap"]], + ncol = 2, + align = "h", + rel_widths = c(1,.3))) + print(pl1_u) + pl2_w <- (cowplot::plot_grid(pl_w[["ci_estimates_plot"]], + pl_w[["weights_heatmap"]], + ncol = 2, + align = "h", + rel_widths = c(1,.3))) + print(pl2_w) + plt <- ggpubr::ggarrange(pl1_u, pl2_w, common.legend = TRUE, legend = "bottom") + print(plt) + } + + return(list( + mean_ci = mean_bonferroni, + weighted_mean_ci = weighted_drivers_bonferroni, + significant_shared_genes = shared_genes, + plotted_ci = plots, + weighted_sig_genes = rownames(weighted_sig_genes), + unweighted_sig_genes = rownames(unweighted_sig_genes), + reference_matrix = paste0(group2name), + test_matrix = paste0(group1name), + welch_sig = welch_sig, + weighted_welch_sig = weighted_welch_sig, + welch_significant_shared_genes = shared_genes2, + pvalue = pvalue)) +} + From ae55d44415e2aa618e8716e271bea0ae2056b347 Mon Sep 17 00:00:00 2001 From: rpalaganas <105320146+rpalaganas@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:05:36 -0400 Subject: [PATCH 02/33] moved projectionDriveRfun. location to the R subfolder --- projectionDriveRfun.R => R/projectionDriveRfun.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename projectionDriveRfun.R => R/projectionDriveRfun.R (100%) diff --git a/projectionDriveRfun.R b/R/projectionDriveRfun.R similarity index 100% rename from projectionDriveRfun.R rename to R/projectionDriveRfun.R From 5144edc982651425e7e319eb4550d30730760025 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 12 Dec 2023 14:29:16 -0500 Subject: [PATCH 03/33] Updated test_projectR.R and projectionDriveR functions Added tests for projectionDriveR in test_projectR.R Removed redundant confidence interval (CI) plotting function from projectionDriveRfun.R Updated CI plotting function in plotting.R to include weighted vs unweighted CI plots Removed redundant projectionDriveR function --- R/plotting.R | 56 ++++---- R/projectionDriveR.R | 245 --------------------------------- R/projectionDriveRfun.R | 113 --------------- tests/testthat/test_projectR.R | 70 ++++++++++ 4 files changed, 100 insertions(+), 384 deletions(-) delete mode 100644 R/projectionDriveR.R diff --git a/R/plotting.R b/R/plotting.R index 28e02dc..fb62274 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -31,19 +31,21 @@ #' @param genes a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order. #' @param weights optional. weights of features to include as annotation. #' @param weights_clip optional. quantile of data to clip color scale for improved visualization. Default: 0.99 -#' @param weights_vis_norm Which processed version of weights to visualize as a heatmap. +#' @param weights_vis_norm Which processed version of weights to visualize as a heatmap. +#' @param weighted specifies whether the confidence intervals in use are weighted by the pattern and labels plots accordingly #' Options are "none" (which uses provided weights) or "quantiles". Default: none #' @return A list with pointrange estimates and, if requested, a heatmap of pattern weights. #' @export plotConfidenceIntervals <- function( - confidence_intervals, #confidence_interval is a data.frame or matrix with two columns (low, high). Genes must be rownames - interval_name = c("low","high"), - pattern_name = NULL, - sort = T, - genes = NULL, - weights = NULL, - weights_clip = 0.99, - weights_vis_norm = "none"){ + confidence_intervals, + interval_name = c("low","high"), + pattern_name = NULL, + sort = T, + genes = NULL, + weights = NULL, + weights_clip = 0.99, + weights_vis_norm = "none", + weighted = F){ if(weights_clip < 0 | weights_clip > 1){ stop("weights_clip must be numeric between 0 and 1") @@ -53,12 +55,16 @@ plotConfidenceIntervals <- function( stop("weights_vis_norm must be either 'none' or 'quantiles'") } + if(weighted == F){ + lab = "Unweighted" + } else{ + lab = "Weighted" + } #gene names were stored as rownames, make sure high and low estimates are stored confidence_intervals$gene_names <- rownames(confidence_intervals) confidence_intervals$low <- confidence_intervals[,interval_name[1]] confidence_intervals$high <- confidence_intervals[,interval_name[2]] - n <- dim(confidence_intervals)[1] confidence_intervals <- confidence_intervals %>% mutate( @@ -77,27 +83,24 @@ plotConfidenceIntervals <- function( if(sort){ #order in increasing order on estimates, and create index variable message("sorting genes in increasing order of estimates...") - confidence_intervals <- confidence_intervals %>% - mutate( - idx = dense_rank(mid)) %>% + confidence_intervals <- confidence_intervals %>% mutate(idx = dense_rank(mid)) %>% arrange(mid) - } else{ - #if not sorted, create index variable for current order - confidence_intervals <- confidence_intervals %>% - mutate(idx = 1:n) + } else{ + #if not sorted, create index variable for current order + confidence_intervals <- confidence_intervals %>% mutate(idx = 1:n) } #genereate point range plot ci_plot <- ggplot(data = confidence_intervals, aes(y = idx, x = mid)) + geom_pointrange(aes(xmin = low, xmax = high, color = positive)) + geom_point(aes(x = mid, y = idx), fill ="black",color = "black") + - theme_minimal() + - xlab("Difference in group means") + - ylab("Genes") + - geom_vline(xintercept = 0, color = "black", linetype = "dashed") + - theme(legend.position = "none") + - ggtitle(pattern_name) - + theme_minimal() + + xlab("Difference in group means") + + ylab("Genes") + + geom_vline(xintercept = 0, color = "black", linetype = "dashed") + + theme(legend.position = "none") + + ggtitle(lab) + #if provided, create heatmap for pattern weights if(!is.null(weights)){ @@ -125,11 +128,12 @@ plotConfidenceIntervals <- function( scale_fill_viridis(limits=c(0, quantile(ordered_weights,weights_clip )), oob=squish, name = hm_name) + - theme_void() + theme_void() } else{ wt_heatmap = NULL} #if weights aren't provided, return NULL return(list("ci_estimates_plot" = ci_plot, "feature_order" = rownames(confidence_intervals), "weights_heatmap" = wt_heatmap)) -} +} + diff --git a/R/projectionDriveR.R b/R/projectionDriveR.R deleted file mode 100644 index 30c37bb..0000000 --- a/R/projectionDriveR.R +++ /dev/null @@ -1,245 +0,0 @@ -####################################################################################################################################### -#' bonferroniCorrectedDifferences -#' -#' Calculate the (weighted) difference in means for each measurement between two groups. -#' @param group1 count matrix 1 -#' @param group2 count matrix 2 -#' @param diff_weights oadings to weight the differential expression between the groups -#' @param pvalue significance value to threshold at -#' -#' @importFrom stats var -#' @importFrom ggrepel geom_label_repel -bonferroniCorrectedDifferences <- function( - group1, #count matrix 1 - group2, #count matrix 2 - diff_weights = NULL, #loadings to weight the differential expression between the groups - pvalue) #signficance value to threshold at - { - - #if passed from projectionDrivers, cellgroup1 and cellgroup 1 will have the same rows (genes) - - if(!(dim(group1)[1] == dim(group2)[1])){ - stop("Rows of two cell group matrices are not identical") - } - - if(any(is.na(group1)) | any(is.na(group2))){ - stop("NA values in count matrices not allowed") - } - - ##Take means over all genes and calculate differences - group1_mean <- apply(group1, 1, mean) - group2_mean <- apply(group2, 1, mean) - mean_diff <- group1_mean - group2_mean - - - #if weights are provided, use them to weight the difference in means - if(!is.null(diff_weights)){ - - #check that genes exactly match between difference vector and weight vector - if(!(all(names(mean_diff) == names(diff_weights)))){ - stop("Names of loadings and counts do not match") - } - - mean_diff <- mean_diff * diff_weights - } - - - - ##Stats and corrections beginning here - dimensionality <- length(mean_diff) #number of measurements (genes) - - n1_samples <- dim(group1)[2] #number of samples (cells) - n2_samples <- dim(group2)[2] - bon_correct <- pvalue / (2*dimensionality) #bonferroni correction - qval <- 1 - bon_correct - - tval <- qt(p = qval, df = n1_samples + n2_samples -2) #critical value - - group1_var <- apply(group1, 1, var) - group2_var <- apply(group2, 1, var) - - pooled <- ((n1_samples-1)*group1_var + (n2_samples-1)*group2_var) / (n1_samples+n2_samples-2) - - #establish dataframe to populate in the following for loop - plusminus = data.frame(low = rep(NA_integer_, dimensionality), high = rep(NA_integer_, dimensionality)) - rownames(plusminus) <- names(mean_diff) - - - #for each gene, calculate confidence interval around mean - for(i in 1:dimensionality){ - - scale = tval * sqrt(pooled[i] * (1/n1_samples + 1/n2_samples)) - - plusminus[i, "low"] <- mean_diff[i] - scale #low estimate - plusminus[i, "high"] <- mean_diff[i] + scale #high estimate - - } - - - return(plusminus) -} - - - -####################################################################################################################################### -#' projectionDriveR -#' -#' Calculate the weighted difference in expression between two groups (group1 - group2) -#' -#' @importFrom cowplot plot_grid -#' @param cellgroup1 gene x cell count matrix for cell group 1 -#' @param cellgroup2 gene x cell count matrix for cell group 2 -#' @param loadings A matrix of continuous values defining the features -#' @param pattern_name column of loadings for which drivers will be calculated. -#' @param pvalue confidence level for the bonferroni confidence intervals. Default 1e-5 -#' @param loadingsNames a vector with names of loading rows. Defaults to rownames. -#' @param display boolean. Whether or not to plot and display confidence intervals -#' @param normalize_pattern Boolean. Whether or not to normalize pattern weights. -#' @return A list with weighted mean differences, mean differences, and differential genes that meet the provided signficance threshold. -#' @export -#' -#' -projectionDriveR<-function( - cellgroup1, #gene x cell count matrix for cell group 1 - cellgroup2, #gene x cell count matrix for cell group 2 - loadings, # a matrix of continous values to be projected with unique rownames - loadingsNames = NULL, # a vector with names of loadings rows - pattern_name, - pvalue = 1e-5, - display = TRUE, - normalize_pattern = TRUE -){ - - #Count matrices can be class matrix, data.frame, sparse.matrix, ... anything that is coercible by as.matrix() - - #TODO: assert rownames and colnames exist where needed, and that things are matrices (or can be cast to) - - - #check that alpha significance level is appropriate - if(pvalue <= 0 | pvalue >= 1){ - stop("pvalue must be numeric between 0 and 1") - } - - #Make sure provided pattern string is a character vector of length one - if(length(pattern_name) != 1 | !is.character(pattern_name)){ - stop("provided pattern_name must be a character vector of length one") - } - - #set loadings rownames if provided - if(!is.null(loadingsNames)){ - rownames(loadings) <- loadingsNames - } - - #pattern weights must be formatted as a matrix for normalization - if(pattern_name %in% colnames(loadings)){ - pattern <- loadings[,pattern_name, drop = F] #data.frame - pattern <- as.matrix(pattern) - } else { - stop(paste0("Provided pattern_name ",pattern_name, " is not a column in provided loadings")) - } - - #Filter the two count matrices and the pattern weights to include the intersection of their features - #shared rows in two data matrices - filtered_data <-geneMatchR(data1=cellgroup1, data2=cellgroup2, data1Names=NULL, data2Names=NULL, merge=FALSE) - print(paste(as.character(dim(filtered_data[[2]])[1]),'row names matched between datasets')) - - cellgroup1 <- filtered_data[[2]] #geneMatchR flips the indexes - cellgroup2 <- filtered_data[[1]] - - - #shared rows in data matrices and loadings - filtered_weights <- geneMatchR(data1 = cellgroup1, data2 = pattern, data1Names = NULL, data2Names = NULL, merge = F) - dimensionality_final <- dim(filtered_weights[[2]])[1] - - print(paste(as.character(dimensionality_final,'row names matched between data and loadings'))) - print(paste('Updated dimension of data:',as.character(paste(dimensionality_final, collapse = ' ')))) - - if(dimensionality_final == 0){ - stop("No features matched by rownames of count matrix and rownames of loadings") - } - - pattern_filtered <- filtered_weights[[1]] - - cellgroup1_filtered <- filtered_weights[[2]] - #do second filtering on other cell group so all genes are consistent - cellgroup2_filtered <- cellgroup2[rownames(cellgroup1_filtered),] - - - #normalize pattern weights - if(normalize_pattern){ - weight_norm <- norm(pattern_filtered) #square of sums of squares (sum for all positive values) - num_nonzero <- sum(pattern_filtered > 0) #number of nonzero weights - pattern_filtered <- pattern_filtered * num_nonzero / weight_norm - } - - #cast feature weights to a named vector - pattern_normalized_vec <- pattern_filtered[,1] - names(pattern_normalized_vec) <- rownames(pattern_filtered) - - - - - #weighted confidence intervals of differences in cluster means - weighted_drivers_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, - group2 = cellgroup2_filtered, - diff_weights = pattern_normalized_vec, - pvalue = pvalue) - - #unweighted confidence intervals of difference in cluster means - mean_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, - group2 = cellgroup2_filtered, - diff_weights = NULL, - pvalue = pvalue) - - #Determine which genes have significantly non-zero mean difference and weighted mean difference - #significant - weighted_sig_idx <- apply(weighted_drivers_bonferroni, 1, function(interval){ - (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) - }) - - mean_sig_idx <- apply(mean_bonferroni, 1, function(interval){ - (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) - }) - - #genes that are collectively either up or down - shared_genes <- base::intersect( - rownames(weighted_drivers_bonferroni)[weighted_sig_idx], - rownames(mean_bonferroni)[mean_sig_idx]) - - if(length(shared_genes) == 0){ - #no genes were significant. Return info we have and skip plotting. - warning("No features (and weighted features) were significantly differentially used between the two groups") - return(list( - mean_ci = mean_bonferroni, - weighted_mean_ci = weighted_drivers_bonferroni, - normalized_weights = pattern_normalized_vec, - significant_genes = shared_genes, - plotted_ci = NULL)) - } - - - conf_intervals <- mean_bonferroni[shared_genes,] - sig_weights <- pattern_normalized_vec[shared_genes] - - #create confidence interval plot - pl <- plotConfidenceIntervals(conf_intervals, - weights = sig_weights, - pattern_name = pattern_name) - - if(display){ - #print confidence interval pointrange plot - print(cowplot::plot_grid(pl[["ci_estimates_plot"]], - pl[["weights_heatmap"]], - ncol = 2, - align = "h", - rel_widths = c(1,.3))) - } - - return(list( - mean_ci = mean_bonferroni, - weighted_mean_ci = weighted_drivers_bonferroni, - normalized_weights = pattern_normalized_vec, - significant_genes = shared_genes, - plotted_ci = pl)) -} - diff --git a/R/projectionDriveRfun.R b/R/projectionDriveRfun.R index bff9a07..fa09bb9 100644 --- a/R/projectionDriveRfun.R +++ b/R/projectionDriveRfun.R @@ -118,119 +118,6 @@ bonferroniCorrectedDifferences <- function( return(plusminus) } -####################################################################################################################################### -#' plotConfidenceIntervals -#' @param confidence_intervals #confidence_interval is a data.frame or matrix with two columns (low, high). Genes must be rownames -#' @param pattern_name name of pattern being plotted, if NULL, defaults to 'weights' -#' @param sort Sorts genes by increasing estimate order if true -#' @param genes vector of genes that can be used to subset data -#' @param weights if provided, will generate heatmap for pattern weights -#' @param weights_clip upper limit of pattern weights -#' @param weights_vis_norm method of transforming weights to percentiles, currently only supports 'none' and 'quantiles' -#' @param weighted specifies whether the confidence intervals in use are weighted by the pattern and labels plots accordingly -#' @importFrom viridis scale_fill_viridis -#' @importFrom scales squish -plotConfidenceIntervals <- function( - confidence_intervals, - interval_name = c("low","high"), - pattern_name = NULL, - sort = T, - genes = NULL, - weights = NULL, - weights_clip = 0.99, - weights_vis_norm = "none", - weighted = F){ - - if(weights_clip < 0 | weights_clip > 1){ - stop("weights_clip must be numeric between 0 and 1") - } - - if(!(weights_vis_norm %in% c("none","quantiles"))){ - stop("weights_vis_norm must be either 'none' or 'quantiles'") - } - - if(weighted == F){ - lab = "Unweighted" - } else{ - lab = "Weighted" - } - #gene names were stored as rownames, make sure high and low estimates are stored - confidence_intervals$gene_names <- rownames(confidence_intervals) - confidence_intervals$low <- confidence_intervals[,interval_name[1]] - confidence_intervals$high <- confidence_intervals[,interval_name[2]] - - n <- dim(confidence_intervals)[1] - confidence_intervals <- confidence_intervals %>% - mutate( - mid = (high+low)/2, #estimate, used for point position - positive = mid > 0) #upregulated, used for color scheme - - if(!is.null(genes)){ - #select genes provided and get them in that order - if(!(is.character(genes))){ stop("Genes must be provided as a character vector") } - n <- length(genes) - message(paste0("Selecting ", n, " features")) - confidence_intervals <- confidence_intervals[genes,] - - } - - if(sort){ - #order in increasing order on estimates, and create index variable - message("sorting genes in increasing order of estimates...") - confidence_intervals <- confidence_intervals %>% mutate(idx = dense_rank(mid)) %>% - arrange(mid) - - } else{ - #if not sorted, create index variable for current order - confidence_intervals <- confidence_intervals %>% mutate(idx = 1:n) - } - - #genereate point range plot - ci_plot <- ggplot(data = confidence_intervals, aes(y = idx, x = mid)) + geom_pointrange(aes(xmin = low, xmax = high, color = positive)) + - geom_point(aes(x = mid, y = idx), fill ="black",color = "black") + - theme_minimal() + - xlab("Difference in group means") + - ylab("Genes") + - geom_vline(xintercept = 0, color = "black", linetype = "dashed") + - theme(legend.position = "none") + - ggtitle(lab) - - #if provided, create heatmap for pattern weights - if(!is.null(weights)){ - - #check that weights are formatted as a named vector - if(!(is.numeric(weights))){ stop("Weights must be provided as a numeric vector") } - if(is.null(names(weights))){ stop("Weights must have names that match estimates")} - - #either use pattern_name, or if not provided, just label heatmap with "weights" - hm_name <- ifelse(is.null(pattern_name), "weights", pattern_name) - - #maintain established order from the pointrange plot - ordered_weights <- weights[rownames(confidence_intervals)] - - if(weights_vis_norm == "quantiles"){ - #transform to percentiles from 0 to 1 - ordered_weights <- trunc(rank(ordered_weights))/length(ordered_weights) - hm_name <- paste0(hm_name, " (quantiles)") #append quantile to plot name - } - - confidence_intervals$weights <- ordered_weights - - #generate heatmap - wt_heatmap <- ggplot(data = confidence_intervals) + - geom_tile(aes(x = 1, y = 1:n, fill = weights)) + - scale_fill_viridis(limits=c(0, quantile(ordered_weights,weights_clip )), - oob=squish, - name = hm_name) + - theme_void() - - } else{ wt_heatmap = NULL} #if weights aren't provided, return NULL - - return(list("ci_estimates_plot" = ci_plot, - "feature_order" = rownames(confidence_intervals), - "weights_heatmap" = wt_heatmap)) -} - ####################################################################################################################################### #' projectionDriveR diff --git a/tests/testthat/test_projectR.R b/tests/testthat/test_projectR.R index 3d7e072..3a28bf7 100644 --- a/tests/testthat/test_projectR.R +++ b/tests/testthat/test_projectR.R @@ -65,3 +65,73 @@ test_that("results are as expected",{ expect_true("CI" %in% names(output[[1]])) }) + +#projectionDriveR check +#test that expected output is present and in correct format + +test_that("results are correctly formatted",{ + + pattern_to_weight <- "Pattern.24" + drivers <- projectionDriveR(microglial_counts, #expression matrix + glial_counts, #expression matrix + loadings = retinal_patterns, #feature x pattern dataframe + loadingsNames = NULL, + pattern_name = pattern_to_weight, #column name + pvalue = 1e-5, #pvalue before bonferroni correction + display = T, + normalize_pattern = T) #normalize feature weights +#check output is in list format +expect_is(drivers, "list") + +#check length of dfs +expect_length(drivers, 12) +expect_length(drivers$mean_ci, 9) +expect_length(drivers$weighted_mean_ci, 9) + +#check that genes used for calculations overlap both datasets and loadings +expect_true(unique(drivers$mean_ci$gene %in% rownames(microglial_counts))) +expect_true(unique(drivers$mean_ci$gene %in% rownames(glial_counts))) +expect_true(unique(drivers$mean_ci$gene %in% rownames(retinal_patterns))) +expect_true(unique(drivers$weighted_mean_ci$gene %in% rownames(microglial_counts))) +expect_true(unique(drivers$weighted_mean_ci$gene %in% rownames(glial_counts))) +expect_true(unique(drivers$weighted_mean_ci$gene %in% rownames(retinal_patterns))) + +#name and class checks +expect_true("mean_ci" %in% names(drivers)) +expect_is(drivers$mean_ci, "data.frame") + +expect_true("weighted_mean_ci" %in% names(drivers)) +expect_is(drivers$mean_ci, "data.frame") + +expect_true("significant_shared_genes" %in% names(drivers)) +expect_is(drivers$significant_shared_genes, "character") + +expect_true("weighted_sig_genes" %in% names(drivers)) +expect_is(drivers$weighted_sig_genes, "character") + +expect_true("unweighted_sig_genes" %in% names(drivers)) +expect_is(drivers$unweighted_sig_genes, "character") + +expect_true("welch_sig" %in% names(drivers)) +expect_is(drivers$welch_sig, "character") + +expect_true("weighted_welch_sig" %in% names(drivers)) +expect_is(drivers$weighted_welch_sig, "character") + +expect_true("welch_significant_shared_genes" %in% names(drivers)) +expect_is(drivers$welch_significant_shared_genes, "character") + +expect_true("pvalue" %in% names(drivers)) +expect_is(drivers$pvalue, "numeric") + +#check that matrix names are proper and match source names +expect_true(deparse(substitute(microglial_counts)) == drivers$test_matrix) +expect_type(drivers$test_matrix, "character") + +expect_true(deparse(substitute(glial_counts)) == drivers$reference_matrix) +expect_type(drivers$reference_matrix, "character") + +#check that plot length is correct +expect_true("plotted_ci" %in% names(drivers)) +expect_length(drivers$plotted_ci, 2) +}) From a38a12c15fd3edc16b97246245fc952baf422f6a Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 9 Jan 2024 15:02:02 -0500 Subject: [PATCH 04/33] Updated projectionDriveR function and added volcano plotting function Updated projectionDriveR function with mode argument, user specifies generation of confidence intervals or pvalues Updated plotting.R with volcano plotting function Added tests for updated functions --- R/plotting.R | 170 ++++++++++++++++- R/projectionDriveRfun.R | 330 +++++++++++++++++---------------- tests/testthat/test_projectR.R | 108 ++++++++--- 3 files changed, 420 insertions(+), 188 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index fb62274..0b73ccd 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -9,10 +9,6 @@ # superheat(test$projection,row.dendrogram=TRUE, pretty.order.cols = TRUE, # heat.pal.values = c(0, 0.5, 1),yt=colSums(test$projection),yt.plot.type='scatterline',yt.axis.name="Sum of\nProjections",X.text=tmp,X.text.size=8,bottom.label.text.angle = 90) # - - - - ####################################################################################################################################### #' #' plotConfidenceIntervals @@ -137,3 +133,169 @@ plotConfidenceIntervals <- function( "weights_heatmap" = wt_heatmap)) } +####################################################################################################################################### +#' pdVolcano +#' +#' Generate volcano plot and gate genes based on fold change and pvalue, includes vectors that can be used with fast gene set enrichment (fgsea) +#' @param result result output from projectionDriveR function with PI method selected +#' @param FC fold change threshold, default at 0.2 +#' @param pvalue significance threshold, default set to pvalue stored in projectionDriveR output +#' @param subset vector of gene names to subset the plot by +#' @param filter.inf remove genes that have pvalues below machine double minimum value +#' @param label.no Number of genes to label on either side of the volcano plot, default 5 +#' @importFrom stats var +#' @importFrom ggrepel geom_label_repel +#' @importFrom ggrepel geom_text_repel +#' @import dplyr +#plot FC, weighted and unweighted. Designed for use with the output of projectionDriveRs +pdVolcano <- function(result, + FC = 0.2, + pvalue = NULL, + subset = NULL, + filter.inf = FALSE, + label.num = 5) { + + #if a genelist is provided, use them to subset the output of projectiondrivers + if(!is.null(subset)){ + #subset the mean_stats object by provided gene list + result$mean_stats <- result$mean_stats[(which(rownames(result$mean_stats) %in% subset)),] + #subset the weighted_mean_stats object by provided gene list + result$weighted_mean_stats <- result$weighted_mean_stats[(which(rownames(result$weighted_mean_stats) %in% subset)),] + + } + + if(filter.inf == TRUE){ + #remove p values below the machine limit representation for plotting purposes + cat("Filtering", length(which(result$mean_stats$welch_padj <= .Machine$double.xmin)),"unweighted genes and", + length(which(result$weighted_mean_stats$welch_padj <= .Machine$double.xmin)), "weighted genes", "\n") + result$mean_stats <- subset(result$mean_stats, welch_padj > .Machine$double.xmin) + result$weighted_mean_stats <- subset(result$weighted_mean_stats, welch_padj > .Machine$double.xmin) + } + + if(is.numeric(FC) == FALSE){ + stop('FC must be a number') + } + + if(is.null(pvalue) == FALSE) { + pvalue = pvalue + } else { + pvalue <- result$meta_data$pvalue + } + + #extract object meta data + metadata <- result$meta_data + + #volcano plotting unweighted + #extract unweighted confidence intervals / statistics + mean_stats <- result$mean_stats + #fold change / significance calls + mean_stats$Color <- paste("NS or FC", FC) + mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix) + mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix) + mean_stats$Color <- factor(mean_stats$Color, + levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix))) + + #label the most significant genes for enrichment + mean_stats$invert_P <- (-log10(mean_stats$welch_padj)) * (mean_stats$mean_diff) + + top_indices <- order(mean_stats$invert_P, decreasing = TRUE)[1:label.num] + bottom_indices <- order(mean_stats$invert_P)[1:label.num] + + # Add labels to the dataframe + mean_stats$label <- NA + mean_stats$label[top_indices] <- paste(rownames(mean_stats)[top_indices]) + mean_stats$label[bottom_indices] <- paste(rownames(mean_stats)[bottom_indices]) + + #set custom colors + myColors <- c("gray","red","dodgerblue") + names(myColors) <- levels(mean_stats$Color) + custom_colors <- scale_color_manual(values = myColors, drop = FALSE) + + #plot + unweightedvolcano = ggplot(data = mean_stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = label)) + + geom_vline(xintercept = c(FC, -FC), lty = "dashed") + + geom_hline(yintercept = -log10(pvalue), lty = "dashed") + + geom_point(na.rm = TRUE) + + custom_colors + + coord_cartesian(ylim = c(0, 350), xlim = c(-2, 2)) + + ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black", + min.segment.length = .1, box.padding = 0.15, + max.overlaps = Inf, na.rm = TRUE) + + labs(x = "FC", + y = "Significance (-log10pval)", + color = NULL) + + ggtitle("Differential Expression") + + theme_bw() + + theme(plot.title = element_text(size = 16), + legend.position = "bottom", + axis.title=element_text(size=14), + legend.text = element_text(size=12)) + + #weighted volcano plot + weighted_mean_stats <- result$weighted_mean_stats + weighted_mean_stats$Color <- paste("NS or FC <", FC) + weighted_mean_stats$Color[weighted_mean_stats$welch_padj < pvalue & weighted_mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix) + weighted_mean_stats$Color[weighted_mean_stats$welch_padj < pvalue & weighted_mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix) + weighted_mean_stats$Color <- factor(weighted_mean_stats$Color, + levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix))) + + weighted_mean_stats$invert_P <- (-log10(weighted_mean_stats$welch_padj)) * (weighted_mean_stats$mean_diff) + + + top_indices <- order(weighted_mean_stats$invert_P, decreasing = TRUE)[1:label.num] + bottom_indices <- order(weighted_mean_stats$invert_P)[1:label.num] + + # Add labels to the dataframe + weighted_mean_stats$label <- NA + weighted_mean_stats$label[top_indices] <- paste(rownames(weighted_mean_stats)[top_indices]) + weighted_mean_stats$label[bottom_indices] <- paste(rownames(weighted_mean_stats)[bottom_indices]) + + myColors <- c("gray","red","dodgerblue") + names(myColors) <- levels(weighted_mean_stats$Color) + custom_colors <- scale_color_manual(values = myColors, drop = FALSE) + + weightedvolcano = ggplot(data = weighted_mean_stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = label)) + + geom_vline(xintercept = c(FC, -FC), lty = "dashed") + + geom_hline(yintercept = -log10(pvalue), lty = "dashed") + + geom_point(na.rm = TRUE) + + custom_colors + + coord_cartesian(ylim = c(0, 350), xlim = c(-2, 2)) + + ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black", + min.segment.length = .1, box.padding = 0.15, + max.overlaps = Inf, na.rm = TRUE) + + labs(x = "FC", + y = "Significance (-log10pval)", + color = NULL) + + ggtitle("Weighted Differential Expression") + + theme_bw() + + theme(plot.title = element_text(size = 16), + legend.position = "bottom", + axis.title=element_text(size=14), + legend.text = element_text(size=12)) + + plt <- ggpubr::ggarrange(unweightedvolcano, weightedvolcano, common.legend = TRUE, legend = "bottom") + print(plt) + + #return a list of genes that can be used as input to fgsea + difexdf <- subset(mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) + vec <- difexdf$estimate + names(vec) <- rownames(difexdf) + + weighted_difexdf <- subset(weighted_mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) + weighted_vec <- weighted_difexdf$estimate + names(weighted_vec) <- rownames(weighted_difexdf) + names(vec) <- rownames(difexdf) + vol_result <- list(mean_stats = mean_stats, + weighted_mean_stats = weighted_mean_stats, + sig_genes = result$sig_genes, + difexpgenes = difexdf, + weighted_difexpgenes = weighted_difexdf, + fgseavecs = list(unweightedvec = vec, + weightedvec = weighted_vec), + meta_data = metadata, + plt = plt) + return(vol_result) +} + + + diff --git a/R/projectionDriveRfun.R b/R/projectionDriveRfun.R index fa09bb9..14a201d 100644 --- a/R/projectionDriveRfun.R +++ b/R/projectionDriveRfun.R @@ -5,19 +5,20 @@ #' @param group1 count matrix 1 #' @param group2 count matrix 2 #' @param diff_weights loadings to weight the differential expression between the groups -#' @param pvalue significance value to threshold at +#' @param pvalue significance value to threshold +#' @param mode user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @import dplyr bonferroniCorrectedDifferences <- function( group1, group2, - diff_weights = NULL, + diff_weights = NULL, + mode = "CI", pvalue) { #if passed from projectionDrivers, cellgroup1 and cellgroup 1 will have the same rows (genes) - if(!(dim(group1)[1] == dim(group2)[1])){ stop("Rows of two cell group matrices are not identical") } @@ -43,8 +44,6 @@ bonferroniCorrectedDifferences <- function( mean_diff <- mean_diff * diff_weights } - - ##Stats and corrections beginning here #calculate confidence intervals dimensionality <- length(mean_diff) #number of measurements (genes) @@ -56,67 +55,80 @@ bonferroniCorrectedDifferences <- function( tval <- qt(p = qval, df = n1_samples + n2_samples -2) #critical value - #calculate p values group1_var <- apply(group1, 1, var) #variance of genes across group 1 group2_var <- apply(group2, 1, var) #variance of genes across group 2 - #vartest <- group1_var / group2_var #test to see if variance across groups is equal, often not equal - - pooled <- ((n1_samples-1)*group1_var + (n2_samples-1)*group2_var) / (n1_samples+n2_samples-2) #pooled standard deviation - - #welch t test - deltaS <- sqrt((group1_var / n1_samples) + (group2_var / n2_samples)) #variance calculation - - welch_estimate <- round(mean_diff / deltaS, digits = 10) #welch t statistic, rounded to 10 digits to avoid infinite decimals - - df <- (((group1_var / n1_samples) + (group2_var / n2_samples))^2) / ((((group1_var / n1_samples)^2) / (n1_samples - 1)) + (((group2_var / n2_samples)^2) / (n2_samples - 1))) #Welch-Satterthwaite equation for degrees of freedom - - welch_pvalue <- 2*pt(-abs(welch_estimate), df=df) #calculate p value from estimate (tvalue) - - welch_p_value_boncorrected <- p.adjust(welch_pvalue, method = "bonferroni", n = dimensionality) #bonferroni correction - - # replace p values equal to zero with the smallest machine value possible - if (min(welch_p_value_boncorrected, na.rm=TRUE) <= .Machine$double.xmin) { - zp <- length(which(welch_p_value_boncorrected <= .Machine$double.xmin)) - warning(paste(zp,"P value(s) equal 0.", - "Converting any values less than", .Machine$double.xmin, "to minimum possible value..."), - call. = FALSE) - welch_p_value_boncorrected[welch_p_value_boncorrected <= .Machine$double.xmin] <- .Machine$double.xmin - } - - #establish dataframe to populate in the following for loop - plusminus = data.frame(low = rep(NA_integer_, dimensionality), - high = rep(NA_integer_, dimensionality), - estimate = rep(NA_integer_, dimensionality), - mean_diff = rep(NA_integer_, dimensionality), - welch_pvalue = rep(NA_integer_, dimensionality), - welch_p_value_boncorrected = rep(NA_integer_, dimensionality), - ref_mean = rep(NA_integer_, dimensionality), - test_mean = rep(NA_integer_, dimensionality), - gene = rep(NA_integer_, dimensionality)) - rownames(plusminus) <- names(mean_diff) - - - #for each gene, calculate confidence interval around mean - for(i in 1:dimensionality){ + + + if(mode == "CI") { - scale = tval * sqrt(pooled[i] * (1/n1_samples + 1/n2_samples)) + #pooled standard deviation + pooled <- ((n1_samples-1)*group1_var + (n2_samples-1)*group2_var) / (n1_samples+n2_samples-2) + #establish dataframe to populate in the following for loop + plusminus = data.frame(low = rep(NA_integer_, dimensionality), + high = rep(NA_integer_, dimensionality), + gene = rep(NA_integer_, dimensionality)) + rownames(plusminus) <- names(mean_diff) - plusminus[i, "low"] <- mean_diff[i] - scale #low estimate - plusminus[i, "high"] <- mean_diff[i] + scale #high estimate - plusminus[i, "estimate"] <- welch_estimate[i] - plusminus[i, "mean_diff"] <- mean_diff[i] - plusminus[i, "welch_pvalue"] <- welch_pvalue[i] - plusminus[i, "welch_p_value_boncorrected"] <- welch_p_value_boncorrected[i] - plusminus[i, "ref_mean"] <- group2_mean[i] - plusminus[i, "test_mean"] <- group1_mean[i] - plusminus[i, "gene"] <- names(mean_diff[i]) + #for each gene, calculate confidence interval around mean + for(i in 1:dimensionality){ + + scale = tval * sqrt(pooled[i] * (1/n1_samples + 1/n2_samples)) + + plusminus[i, "low"] <- mean_diff[i] - scale #low estimate + plusminus[i, "high"] <- mean_diff[i] + scale #high estimate + plusminus[i, "gene"] <- names(mean_diff[i]) #gene names for easy sorting + } + + } else if (mode == "PV") { + #welch t test + #vartest <- group1_var / group2_var #test to see if variance across groups is equal, often not equal + #variance calculation + deltaS <- sqrt((group1_var / n1_samples) + (group2_var / n2_samples)) + #welch t statistic, rounded to 10 digits to avoid infinite decimals + welch_estimate <- round(mean_diff / deltaS, digits = 10) + #Welch-Satterthwaite equation for degrees of freedom + df <- (((group1_var / n1_samples) + (group2_var / n2_samples))^2) / ((((group1_var / n1_samples)^2) / (n1_samples - 1)) + (((group2_var / n2_samples)^2) / (n2_samples - 1))) + #calculate p value from estimate/tvalue + welch_pvalue <- 2*pt(-abs(welch_estimate), df=df) + #bonferroni correction + welch_padj <- p.adjust(welch_pvalue, method = "bonferroni", n = dimensionality) + # replace p values equal to zero with the smallest machine value possible + if (min(welch_padj, na.rm=TRUE) <= .Machine$double.xmin) { + zp <- length(which(welch_padj <= .Machine$double.xmin)) + warning(paste(zp,"P value(s) equal 0.", + "Converting any values less than", .Machine$double.xmin, "to minimum possible value..."), + call. = FALSE) + welch_padj[welch_padj <= .Machine$double.xmin] <- .Machine$double.xmin + } + #establish dataframe to populate in the following for loop + plusminus = data.frame(ref_mean = rep(NA_integer_, dimensionality), + test_mean = rep(NA_integer_, dimensionality), + mean_diff = rep(NA_integer_, dimensionality), + estimate = rep(NA_integer_, dimensionality), + welch_pvalue = rep(NA_integer_, dimensionality), + welch_padj = rep(NA_integer_, dimensionality), + gene = rep(NA_integer_, dimensionality)) + rownames(plusminus) <- names(mean_diff) + #input stats gene-wise + for(i in 1:dimensionality){ + plusminus[i, "ref_mean"] <- group2_mean[i] + plusminus[i, "test_mean"] <- group1_mean[i] + plusminus[i, "mean_diff"] <- mean_diff[i] + plusminus[i, "estimate"] <- welch_estimate[i] + plusminus[i, "welch_pvalue"] <- welch_pvalue[i] + plusminus[i, "welch_padj"] <- welch_padj[i] + plusminus[i, "gene"] <- names(mean_diff[i]) + } + } else { + stop("Invalid mode selection") } return(plusminus) } + ####################################################################################################################################### @@ -134,6 +146,7 @@ bonferroniCorrectedDifferences <- function( #' @param loadingsNames a vector with names of loading rows. Defaults to rownames. #' @param display boolean. Whether or not to plot and display confidence intervals #' @param normalize_pattern Boolean. Whether or not to normalize pattern weights. +#' @param mode user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI #' @return A list with weighted mean differences, mean differences, and differential genes that meet the provided signficance threshold. #' @export #' @@ -146,7 +159,8 @@ projectionDriveR<-function( pattern_name, pvalue = 1e-5, display = TRUE, - normalize_pattern = TRUE + normalize_pattern = TRUE, + mode = "CI" ){ #Count matrices can be class matrix, data.frame, sparse.matrix, ... anything that is coercible by as.matrix() @@ -173,12 +187,11 @@ projectionDriveR<-function( } else { stop(paste0("Provided pattern_name ",pattern_name, " is not a column in provided loadings")) } - + print(paste("Mode:",mode)) #extract names of data objects group1name <- deparse(substitute(cellgroup1)) group2name <- deparse(substitute(cellgroup2)) - #Filter the two count matrices and the pattern weights to include the intersection of their features #shared rows in two data matrices @@ -217,118 +230,115 @@ projectionDriveR<-function( pattern_normalized_vec <- pattern_filtered[,1] names(pattern_normalized_vec) <- rownames(pattern_filtered) - - - #weighted confidence intervals of differences in cluster means weighted_drivers_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, group2 = cellgroup2_filtered, diff_weights = pattern_normalized_vec, - pvalue = pvalue) - weighted_welch_sig <- rownames(weighted_drivers_bonferroni[which(weighted_drivers_bonferroni$welch_p_value_boncorrected <= pvalue),]) - - # Apply the t test function to unweighted expression matrices and call significance - n_tests <- nrow(cellgroup1_filtered) - cat("ntests are:", n_tests, "\n") - - + pvalue = pvalue, + mode = mode) #unweighted confidence intervals of difference in cluster means mean_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, group2 = cellgroup2_filtered, diff_weights = NULL, - pvalue = pvalue) - welch_sig <- rownames(mean_bonferroni[which(mean_bonferroni$welch_p_value_boncorrected <= pvalue),]) - - #Determine which genes have significantly non-zero mean difference and weighted mean difference - #significant - weighted_sig_idx <- apply(weighted_drivers_bonferroni[,1:2], 1, function(interval){ - (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) - }) - - weighted_sig_genes <- weighted_drivers_bonferroni[weighted_sig_idx,] - - mean_sig_idx <- apply(mean_bonferroni[,1:2], 1, function(interval){ - (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) - }) - - unweighted_sig_genes <- mean_bonferroni[mean_sig_idx,] - - - #genes that are collectively either up or down - shared_genes <- base::intersect( - rownames(weighted_drivers_bonferroni)[weighted_sig_idx], - rownames(mean_bonferroni)[mean_sig_idx]) - cat("the length of shared genes are:", length(shared_genes), '\n') - - shared_genes2 <- base::intersect( - welch_sig, weighted_welch_sig) - - - if(length(shared_genes) == 0){ - #no genes were significant. Return info we have and skip plotting. - warning("No features (and weighted features) were significantly differentially used between the two groups") - return(list( + pvalue = pvalue, + mode = mode) +#generate confidence interval mode + if(mode == "CI"){ + #Determine which genes have significantly non-zero mean difference and weighted mean difference + #significant + weighted_sig_idx <- apply(weighted_drivers_bonferroni[,1:2], 1, function(interval){ + (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) + }) + + mean_sig_idx <- apply(mean_bonferroni[,1:2], 1, function(interval){ + (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) + }) + + weighted_sig_genes <- weighted_drivers_bonferroni[weighted_sig_idx,] + unweighted_sig_genes <- mean_bonferroni[mean_sig_idx,] + #genes that are collectively either up or down + shared_genes <- base::intersect( + rownames(weighted_drivers_bonferroni)[weighted_sig_idx], + rownames(mean_bonferroni)[mean_sig_idx]) + cat("the length of shared genes are:", length(shared_genes), '\n') + conf_intervals <- mean_bonferroni[shared_genes,] + sig_weights <- pattern_normalized_vec[shared_genes] + + weighted_conf_intervals <- weighted_drivers_bonferroni[shared_genes,] + #create confidence interval plot (unweighted) + pl <- plotConfidenceIntervals(conf_intervals, + weights = sig_weights, + pattern_name = pattern_name, + weighted = F) + #weighted + pl_w <- plotConfidenceIntervals(weighted_conf_intervals, + weights = sig_weights, + pattern_name = pattern_name, + weighted = T) + + plots <- list(unweighted = pl,weighted = pl_w) + if(display){ + #print confidence interval pointrange plot + pl1_u <- (cowplot::plot_grid(pl[["ci_estimates_plot"]], + pl[["weights_heatmap"]], + ncol = 2, + align = "h", + rel_widths = c(1,.3))) + print(pl1_u) + pl2_w <- (cowplot::plot_grid(pl_w[["ci_estimates_plot"]], + pl_w[["weights_heatmap"]], + ncol = 2, + align = "h", + rel_widths = c(1,.3))) + print(pl2_w) + plt <- ggpubr::ggarrange(pl1_u, pl2_w, common.legend = TRUE, legend = "bottom") + print(plt) + } + + if(length(shared_genes) == 0){ + #no genes were significant. Return info we have and skip plotting. + warning("No features (and weighted features) were significantly differentially used between the two groups") + + result <- list(mean_ci = mean_bonferroni, + weighted_mean_ci = weighted_drivers_bonferroni, + significant_shared_genes = shared_genes, + plotted_ci = NULL, + sig_genes = list(unweighted_sig_genes = rownames(unweighted_sig_genes), + weighted_sig_genes = rownames(weighted_sig_genes)), + meta_data = list(reference_matrix = paste0(group2name), + test_matrix = paste0(group1name)) + ) + } + + result <- list( mean_ci = mean_bonferroni, weighted_mean_ci = weighted_drivers_bonferroni, - significant_shared_genes = shared_genes, - plotted_ci = NULL, - weighted_sig_genes = rownames(weighted_sig_genes), - unweighted_sig_genes = rownames(unweighted_sig_genes), - reference_matrix = paste0(group2name), - test_matrix = paste0(group1name), - welch_sig = welch_sig, - weighted_welch_sig = weighted_welch_sig, - welch_significant_shared_genes = shared_genes2, - pvalue = pvalue)) + sig_genes = list(unweighted_sig_genes = rownames(unweighted_sig_genes), + weighted_sig_genes = rownames(weighted_sig_genes), + significant_shared_genes = shared_genes), + plotted_ci = plots, + meta_data = list(reference_matrix = paste0(group2name), + test_matrix = paste0(group1name)) + ) + } else if (mode == "PV"){ + #create vector of significant genes from weighted and unweighted tests + weighted_PV_sig <- rownames(weighted_drivers_bonferroni[which(weighted_drivers_bonferroni$welch_padj <= pvalue),]) + PV_sig <- rownames(mean_bonferroni[which(mean_bonferroni$welch_padj <= pvalue),]) + #create vector of significant genes shared between weighted and unweighted tests + shared_genes_PV <- base::intersect( + PV_sig, weighted_PV_sig) + result <- list(mean_stats = mean_bonferroni, + weighted_mean_stats = weighted_drivers_bonferroni, + sig_genes = list(PV_sig = PV_sig, + weighted_PV_sig = weighted_PV_sig, + PV_significant_shared_genes = shared_genes_PV), + meta_data = list(reference_matrix = paste0(group2name), + test_matrix = paste0(group1name), + pvalue = pvalue) + ) + } else { + stop("Invalid mode selection") } - - - conf_intervals <- mean_bonferroni[shared_genes,] - sig_weights <- pattern_normalized_vec[shared_genes] - - weighted_conf_intervals <- weighted_drivers_bonferroni[shared_genes,] - - #create confidence interval plot (unweighted) - pl <- plotConfidenceIntervals(conf_intervals, - weights = sig_weights, - pattern_name = pattern_name, - weighted = F) - #weighted - pl_w <- plotConfidenceIntervals(weighted_conf_intervals, - weights = sig_weights, - pattern_name = pattern_name, - weighted = T) - plots <- list(unweighted = pl,weighted = pl_w) - if(display){ - #print confidence interval pointrange plot - pl1_u <- (cowplot::plot_grid(pl[["ci_estimates_plot"]], - pl[["weights_heatmap"]], - ncol = 2, - align = "h", - rel_widths = c(1,.3))) - print(pl1_u) - pl2_w <- (cowplot::plot_grid(pl_w[["ci_estimates_plot"]], - pl_w[["weights_heatmap"]], - ncol = 2, - align = "h", - rel_widths = c(1,.3))) - print(pl2_w) - plt <- ggpubr::ggarrange(pl1_u, pl2_w, common.legend = TRUE, legend = "bottom") - print(plt) - } - - return(list( - mean_ci = mean_bonferroni, - weighted_mean_ci = weighted_drivers_bonferroni, - significant_shared_genes = shared_genes, - plotted_ci = plots, - weighted_sig_genes = rownames(weighted_sig_genes), - unweighted_sig_genes = rownames(unweighted_sig_genes), - reference_matrix = paste0(group2name), - test_matrix = paste0(group1name), - welch_sig = welch_sig, - weighted_welch_sig = weighted_welch_sig, - welch_significant_shared_genes = shared_genes2, - pvalue = pvalue)) + return(result) } diff --git a/tests/testthat/test_projectR.R b/tests/testthat/test_projectR.R index 3a28bf7..99a18c1 100644 --- a/tests/testthat/test_projectR.R +++ b/tests/testthat/test_projectR.R @@ -69,7 +69,7 @@ test_that("results are as expected",{ #projectionDriveR check #test that expected output is present and in correct format -test_that("results are correctly formatted",{ +test_that("results are correctly formatted for confidence interval mode",{ pattern_to_weight <- "Pattern.24" drivers <- projectionDriveR(microglial_counts, #expression matrix @@ -79,14 +79,15 @@ test_that("results are correctly formatted",{ pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction display = T, - normalize_pattern = T) #normalize feature weights + normalize_pattern = T, #normalize feature weights + mode = "CI") #set to confidence interval mode #check output is in list format expect_is(drivers, "list") #check length of dfs -expect_length(drivers, 12) -expect_length(drivers$mean_ci, 9) -expect_length(drivers$weighted_mean_ci, 9) +expect_length(drivers, 5) +expect_length(drivers$mean_ci, 3) +expect_length(drivers$weighted_mean_ci, 3) #check that genes used for calculations overlap both datasets and loadings expect_true(unique(drivers$mean_ci$gene %in% rownames(microglial_counts))) @@ -103,35 +104,94 @@ expect_is(drivers$mean_ci, "data.frame") expect_true("weighted_mean_ci" %in% names(drivers)) expect_is(drivers$mean_ci, "data.frame") -expect_true("significant_shared_genes" %in% names(drivers)) -expect_is(drivers$significant_shared_genes, "character") +expect_true("sig_genes" %in% names(drivers)) +expect_is(drivers$sig_genes, "list") +expect_length(drivers$sig_genes, 3) -expect_true("weighted_sig_genes" %in% names(drivers)) -expect_is(drivers$weighted_sig_genes, "character") +expect_true(unique(c("unweighted_sig_genes", "weighted_sig_genes", "significant_shared_genes") %in% names(drivers$sig_genes))) -expect_true("unweighted_sig_genes" %in% names(drivers)) -expect_is(drivers$unweighted_sig_genes, "character") +expect_is(drivers$sig_genes$unweighted_sig_genes, "character") -expect_true("welch_sig" %in% names(drivers)) -expect_is(drivers$welch_sig, "character") +expect_is(drivers$sig_genes$weighted_sig_genes, "character") -expect_true("weighted_welch_sig" %in% names(drivers)) -expect_is(drivers$weighted_welch_sig, "character") +expect_is(drivers$sig_genes$significant_shared_genes, "character") -expect_true("welch_significant_shared_genes" %in% names(drivers)) -expect_is(drivers$welch_significant_shared_genes, "character") - -expect_true("pvalue" %in% names(drivers)) -expect_is(drivers$pvalue, "numeric") +expect_true("meta_data" %in% names(drivers)) +expect_is(drivers$meta_data, "list") +expect_length(drivers$meta_data, 2) #check that matrix names are proper and match source names -expect_true(deparse(substitute(microglial_counts)) == drivers$test_matrix) -expect_type(drivers$test_matrix, "character") +expect_true(deparse(substitute(microglial_counts)) == drivers$meta_data$test_matrix) +expect_type(drivers$meta_data$test_matrix, "character") -expect_true(deparse(substitute(glial_counts)) == drivers$reference_matrix) -expect_type(drivers$reference_matrix, "character") +expect_true(deparse(substitute(glial_counts)) == drivers$meta_data$reference_matrix) +expect_type(drivers$meta_data$reference_matrix, "character") #check that plot length is correct expect_true("plotted_ci" %in% names(drivers)) expect_length(drivers$plotted_ci, 2) }) + +test_that("results are correctly formatted for P value mode",{ + + pattern_to_weight <- "Pattern.24" + drivers <- projectionDriveR(microglial_counts, #expression matrix + glial_counts, #expression matrix + loadings = retinal_patterns, #feature x pattern dataframe + loadingsNames = NULL, + pattern_name = pattern_to_weight, #column name + pvalue = 1e-5, #pvalue before bonferroni correction + display = T, + normalize_pattern = T, #normalize feature weights + mode = "PV") #set to p value mode + #check output is in list format + expect_is(drivers, "list") + + #check length of dfs + expect_length(drivers, 4) + expect_length(drivers$mean_stats, 7) + expect_length(drivers$weighted_mean_stats, 7) + + #check that genes used for calculations overlap both datasets and loadings + expect_true(unique(drivers$mean_stats$gene %in% rownames(microglial_counts))) + expect_true(unique(drivers$mean_stats$gene %in% rownames(glial_counts))) + expect_true(unique(drivers$mean_stats$gene %in% rownames(retinal_patterns))) + expect_true(unique(drivers$weighted_mean_stats$gene %in% rownames(microglial_counts))) + expect_true(unique(drivers$weighted_mean_stats$gene %in% rownames(glial_counts))) + expect_true(unique(drivers$weighted_mean_stats$gene %in% rownames(retinal_patterns))) + + #name and class checks + expect_true("mean_stats" %in% names(drivers)) + expect_is(drivers$mean_stats, "data.frame") + + expect_true("weighted_mean_stats" %in% names(drivers)) + expect_is(drivers$mean_stats, "data.frame") + + expect_true("sig_genes" %in% names(drivers)) + expect_is(drivers$sig_genes, "list") + expect_length(drivers$sig_genes, 3) + + expect_true(unique(c("PV_sig", "weighted_PV_sig", "PV_significant_shared_genes") %in% names(drivers$sig_genes))) + + expect_is(drivers$sig_genes$PV_sig, "character") + + expect_is(drivers$sig_genes$weighted_PV_sig, "character") + + expect_is(drivers$sig_genes$PV_significant_shared_genes, "character") + + expect_true("meta_data" %in% names(drivers)) + expect_is(drivers$meta_data, "list") + expect_length(drivers$meta_data, 3) + expect_true("pvalue" %in% names(drivers$meta_data)) + expect_is(drivers$meta_data$pvalue, "numeric") + + #check that matrix names are proper and match source names + expect_true(deparse(substitute(microglial_counts)) == drivers$meta_data$test_matrix) + expect_type(drivers$meta_data$test_matrix, "character") + + expect_true(deparse(substitute(glial_counts)) == drivers$meta_data$reference_matrix) + expect_type(drivers$meta_data$reference_matrix, "character") + +}) + + From fcbab62dc8536d31615c75cb2274d1b2de3a96a2 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 13:47:42 -0500 Subject: [PATCH 05/33] Updated vignette Updated vignette to reflect updated projectionDriveR function --- vignettes/projectR.Rmd | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index af6ccba..02ad9fd 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -367,7 +367,7 @@ Given loadings that define the weight of features (genes) in a given latent spac ``` library(projectR) projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, - pvalue, pattern_name, display = T, normalize_pattern = T) + pvalue, pattern_name, display = T, normalize_pattern = T, mode = "CI") ``` @@ -384,10 +384,10 @@ The arguments for projectionDriveR are: **`pvalue`** Determines the significance of the confidence interval to be calculated between the difference of means **`display`** Boolean. Whether or not to plot the estimates of significant features. Default = T **`normalize_pattern`** Boolean. Whether or not to normalize the average feature weight. Default = T - +**`mode`** 'CI' or 'PV'. Specifies whether to run projectionDriveR in confidence interval mode or to generate p values. Default = "CI" ### Output -The output of `projectionDriveR` is a list of length five `mean_ci` holds the confidence intervals for the difference in means for all features, `weighted_ci` holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, `significant_genes` is a vector of gene names that are significantly different at the threshold provided. `plotted_ci` returns the ggplot figure of the confidence intervals, see `plotConfidenceIntervals` for documentation. +The output of `projectionDriveR` is a list of length five `mean_ci` holds the confidence intervals for the difference in means for all features, `weighted_mean_ci` holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, `sig_genes` is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (`unweighted_sig_genes`), the weighted mean confidence intervals (`weighted_sig_genes`) and genes shared between the two (`significant_shared_genes`) . `plotted_ci` returns the ggplot figure of the confidence intervals, see `plotConfidenceIntervals` for documentation. ### Identifying differential features associated with learned patterns @@ -415,9 +415,11 @@ drivers <- projectionDriveR(microglial_counts, #expression matrix pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction display = T, - normalize_pattern = T) #normalize feature weights + normalize_pattern = T, #normalize feature weights + mode = "CI") #confidence interval mode + -conf_intervals <- drivers$mean_ci[drivers$significant_genes,] +conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,] str(conf_intervals) @@ -436,7 +438,7 @@ The arguments for plotConfidenceIntervals are: **`weights`** weights of features to include as annotation (default = NULL will not include heatmap) **`weights_clip`** quantile of data to clip color scale for improved visualization (default: 0.99) **`weights_vis_norm`** Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none" - +**`weighted`** Boolean. Specifies whether confidence intervals are weighted by a pattern or not. Default = "F" ### Output A list of the length three that includes confidence interval plots and relevant info. `ci_estimates_plot` is the point-range plot for the provided estimates. If called from within `projectionDriveR`, the unweighted estimates are used. `feature_order` is the vector of gene names in the order shown in the figure. `weights_heatmap` is a heatmap annotation of the gene loadings, in the same order as above. @@ -473,7 +475,8 @@ pl2 <- plots_list[["weights_heatmap"]] weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,] plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, sort = F, - pattern_name = pattern_to_weight) + pattern_name = pattern_to_weight, + weighted = T) pl3 <- plots_list_weighted[["ci_estimates_plot"]] + xlab("Difference in weighted group means") + From bf5ff9f1190829a36472024014e05c94f4152235 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 14:22:17 -0500 Subject: [PATCH 06/33] Update projectR.Rmd include magick package --- vignettes/projectR.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index 02ad9fd..ae2f5aa 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -396,6 +396,7 @@ The output of `projectionDriveR` is a list of length five `mean_ci` holds the co options(width = 60) library(projectR) library(dplyr, warn.conflicts = F) +library(magick) #gene weights x pattern data("retinal_patterns") From 0777b3edd7f761deccafb2613505d48aea043db9 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 14:25:16 -0500 Subject: [PATCH 07/33] Update plotting.R include ggpubr import --- R/plotting.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/plotting.R b/R/plotting.R index 0b73ccd..fdc74f2 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -146,6 +146,7 @@ plotConfidenceIntervals <- function( #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @importFrom ggrepel geom_text_repel +#' @importFrom ggpubr ggarrange #' @import dplyr #plot FC, weighted and unweighted. Designed for use with the output of projectionDriveRs pdVolcano <- function(result, From 6c5bf026d26a7abc23fe6c131a3877459403009a Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 14:45:37 -0500 Subject: [PATCH 08/33] Update projectR.Rmd install specific branch --- vignettes/projectR.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index ae2f5aa..fb4c08e 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -33,7 +33,7 @@ Technological advances continue to spur the exponential growth of biological dat For automatic Bioconductor package installation, start R, and run: ``` -BiocManager::install("projectR") +BiocManager::install("genesofeve/projectR@projectionDriveR") ``` ## Methods From 6baf395c8fca8034ab40481c7e3577b3095540b5 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 15:04:15 -0500 Subject: [PATCH 09/33] remove rmd Removed rmd file to check if it is causing build errors --- vignettes/projectR.Rmd | 535 --------------- vignettes/projectR.bib | 99 --- vignettes/projectR.html | 1403 --------------------------------------- vignettes/projectR.tex | 758 --------------------- 4 files changed, 2795 deletions(-) delete mode 100644 vignettes/projectR.Rmd delete mode 100644 vignettes/projectR.bib delete mode 100644 vignettes/projectR.html delete mode 100644 vignettes/projectR.tex diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd deleted file mode 100644 index fb4c08e..0000000 --- a/vignettes/projectR.Rmd +++ /dev/null @@ -1,535 +0,0 @@ ---- -title: "projectR Vignette" -author: -- "Gaurav Sharma" -- "Charles Shin" -- "Jared N. Slosberg" -- "Loyal A. Goff" -- "Genevieve L. Stein-O'Brien" - -date: "`r BiocStyle::doc_date()`" -output: BiocStyle::html_document -bibliography: projectR.bib -description: | - Functions for the Projection of Weights from PCA, CoGAPS, NMF, Correlation, and Clustering -vignette: > - %\VignetteIndexEntry{projectR} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -options(scipen = 1, digits = 2) -set.seed(1234) -``` - -# Introduction -Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics—genomics, transcriptomics, epigenomics, proteomics, etc.—each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for *in silico* experiments, or additional information on the algorithm, see [Stein-O'Brien, et al](https://www.sciencedirect.com/science/article/pii/S2405471219301462) and [Sharma, et al](https://academic.oup.com/bioinformatics/article/36/11/3592/5804979). - -# Getting started with projectR - -## Installation Instructions - -For automatic Bioconductor package installation, start R, and run: -``` -BiocManager::install("genesofeve/projectR@projectionDriveR") -``` - -## Methods - -Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function $\varphi(x)=y : \Re^{D} \mapsto \Re^{d}$ s.t. $d \leq D$ for $x \in \Re^{D}, y \in \Re^{d}$ @Barbakh:2009bw . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to "omic" analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless. - -## The base projectR function - -The generic projectR function is executed as follows: -``` -library(projectR) -projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) -``` - -### Input Arguments -The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e. a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the ``dataNames`` and `loadingNames` arguments can be manually supplied by the user. - -The arguments are as follows: -**`data`** a dataset to be projected into the pattern space -**`loadings`** a matrix of continous values with unique rownames to be projected -**`dataNames`** a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by `rownames(data)`. Order of names in vector must match order of rows in data. -**`loadingsNames`** a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by `rownames(loadings)`. Order of names in vector must match order of rows in loadings. -**`NP`** vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix. -**`full`** logical indicating whether to return the full model solution. By default only the new pattern object is returned. - -The `loadings` argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF). - -### Output -The basic output of the base projectR function, i.e. `full=FALSE`, returns `projectionPatterns` representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. `full=TRUE`, returns `projectionFit`, a list containing `projectionPatterns` and `Projection`. The `Projection` object contains additional information from the proceedure used to obtain the `projectionPatterns`. For the the the base projectR function, `Projection` is the full `lmFit` model from the package `r BiocStyle::Biocpkg("limma")`. - -# PCA projection -Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, $V$, and $\Re^n$ which preserves inner products. If $V$ is an inner product space over $\Re$ with orthonormal basis $B = v_1,...,v_n$ and $v \epsilon V s.t [v]_B = (r_1,...,r_n)$, then finding the coordinate of $v_i$ in $v$ is precisely the inner product of $v$ with $v_i$, i.e. $r_i = \langle v,v_i \rangle$. This formulation is implemented for only those genes belonging to both the new data and the PC space. The **`projectR`** function has S4 method for class `prcomp`. - -## Obtaining PCs to project. -```{r prcomp, warning=FALSE} -# data to define PCs -library(projectR) -data(p.RNAseq6l3c3t) - -# do PCA on RNAseq6l3c3t expression data -pc.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t)) -pcVAR <- round(((pc.RNAseq6l3c3t$sdev)^2/sum(pc.RNAseq6l3c3t$sdev^2))*100,2) -dPCA <- data.frame(cbind(pc.RNAseq6l3c3t$x,pd.RNAseq6l3c3t)) - -#plot pca -library(ggplot2) -setCOL <- scale_colour_manual(values = c("blue","black","red"), name="Condition:") -setFILL <- scale_fill_manual(values = c("blue","black","red"),guide = FALSE) -setPCH <- scale_shape_manual(values=c(23,22,25,25,21,24),name="Cell Line:") - -pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line, - fill=ID.cond)) + - geom_point(aes(size=days),alpha=.6)+ - setCOL + setPCH + setFILL + - scale_size_area(breaks = c(2,4,6), name="Day") + - theme(legend.position=c(0,0), legend.justification=c(0,0), - legend.direction = "horizontal", - panel.background = element_rect(fill = "white",colour=NA), - legend.background = element_rect(fill = "transparent",colour=NA), - plot.title = element_text(vjust = 0,hjust=0,face="bold")) + - labs(title = "PCA of hPSC PolyA RNAseq", - x=paste("PC1 (",pcVAR[1],"% of varience)",sep=""), - y=paste("PC2 (",pcVAR[2],"% of varience)",sep="")) -``` - -## Projecting prcomp objects -```{r projectR.prcomp, warning=FALSE} -# data to project into PCs from RNAseq6l3c3t expression data -data(p.ESepiGen4c1l) - -library(projectR) -PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t, -full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) - -pd.ESepiGen4c1l<-data.frame(Condition=sapply(colnames(p.ESepiGen4c1l$mRNA.Seq), - function(x) unlist(strsplit(x,'_'))[1]),stringsAsFactors=FALSE) -pd.ESepiGen4c1l$color<-c(rep("red",2),rep("green",3),rep("blue",2),rep("black",2)) -names(pd.ESepiGen4c1l$color)<-pd.ESepiGen4c1l$Cond - -dPCA2ESepi<- data.frame(cbind(t(PCA2ESepi[[1]]),pd.ESepiGen4c1l)) - -#plot pca -library(ggplot2) -setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"), - guide = guide_legend(title="Lineage")) - -pPC2ESepiGen4c1l <- ggplot(dPCA2ESepi, aes(x=PC1, y=PC2, colour=Condition)) + - geom_point(size=5) + setEpiCOL + - theme(legend.position=c(0,0), legend.justification=c(0,0), - panel.background = element_rect(fill = "white"), - legend.direction = "horizontal", - plot.title = element_text(vjust = 0,hjust=0,face="bold")) + - labs(title = "Encode RNAseq in target PC1 & PC2", - x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""), - y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep="")) - -``` - -```{r, fig.show='hold', fig.width=10, fig.height=5, echo=FALSE, message= FALSE} -library(gridExtra) -#grid.arrange(pPCA,pPC2ESepiGen4c1l,nrow=1) -``` - -# NMF projection -NMF decomposes a data matrix of $D$ with $N$ genes as rows and $M$ samples as columns, into two matrices, as $D ~ AP$. The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (${\bf{P}}$) and the corresponding distribution matrix of weights (${\bf{A}}$) whose product forms a mock data matrix (${\bf{M}}$) that represents the gene-wise data ${\bf{D}}$ within noise limits ($\boldsymbol{\varepsilon}$). That is, -\begin{equation} -{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon} ..............(1) -\label{eq:matrixDecomp} -\end{equation} -The number of rows in ${\bf{P}}$ (columns in ${\bf{A}}$) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm @Ochs2006, the matrices ${\bf{A}}$ and ${\bf{P}}$ in CoGAPS are assumed to have the atomic prior described in @Sibisi1997. In the CoGAPS/GWCoGAPS implementation, $\alpha_{A}$ and $\alpha_{P}$ are corresponding parameters for the expected number of atoms which map to each matrix element in ${\bf{A}}$ and ${\bf{P}}$, respectively. The corresponding matrices ${\bf{A}}$ and ${\bf{P}}$ are found by MCMC sampling. - -Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where ${\bf{A}}$ is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns ${\bf{P}}$ in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the `lmFit` function in the `r BiocStyle::Biocpkg("limma")` package. The `projectR` function has S4 method for class `Linear Embedding Matrix, LME`. - -``` -library(projectR) -projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, - NP = NA, full = FALSE) -``` - -### Input Arguments -The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names--rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running. - -The arguments are as follows: - -**`data`** a target dataset to be projected into the pattern space -**`loadings`** a CogapsResult object -**`dataNames`** rownames (eg. gene names) of the target dataset, if different from existing rownames of data -**`loadingsNames`** loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames -**`NP`** vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix. -**`full`** logical indicating whether to return the full model solution. By default only the new pattern object is returned. - - -### Output -The basic output of the base projectR function, i.e. `full=FALSE`, returns `projectionPatterns` representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. `full=TRUE`, returns `projectionFit`, a list containing `projectionPatterns` and `Projection`. The `Projection` object contains additional information from the procedure used to obtain the `projectionPatterns`. For the the the base projectR function, `Projection` is the full `lmFit` model from the package `r BiocStyle::Biocpkg('limma')`. - -## Obtaining CoGAPS patterns to project. - -```{r} -# get data -library(projectR) -AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data -AP <- AP$Amean -# heatmap of gene weights for CoGAPs patterns -library(gplots) -par(mar=c(1,1,1,1)) -pNMF<-heatmap.2(as.matrix(AP),col=bluered, trace='none', - distfun=function(c) as.dist(1-cor(t(c))) , - cexCol=1,cexRow=.5,scale = "row", - hclustfun=function(x) hclust(x, method="average") - ) -``` - -## Projecting CoGAPS objects -```{r} -# data to project into PCs from RNAseq6l3c3t expression data -library(projectR) -data('p.ESepiGen4c1l4') -data('p.RNAseq6l3c3t') - -NMF2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=AP,full=TRUE, - dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) - -dNMF2ESepi<- data.frame(cbind(t(NMF2ESepi),pd.ESepiGen4c1l)) - -#plot pca -library(ggplot2) -setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"), -guide = guide_legend(title="Lineage")) - -pNMF2ESepiGen4c1l <- ggplot(dNMF2ESepi, aes(x=X1, y=X2, colour=Condition)) + - geom_point(size=5) + setEpiCOL + - theme(legend.position=c(0,0), legend.justification=c(0,0), - panel.background = element_rect(fill = "white"), - legend.direction = "horizontal", - plot.title = element_text(vjust = 0,hjust=0,face="bold")) - labs(title = "Encode RNAseq in target PC1 & PC2", - x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""), - y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep="")) -``` - -# Clustering projection - -As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the "projection" of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, `cluster2pattern` uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a `pclust` object which can serve as input to `projectR`. Alternatively, the `intersectoR` function can be used to test for significant overlap between two clustering objects. Both `cluster2pattern` and `intersectoR` methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections. - -## cluster2pattern - -`cluster2pattern` uses the corelation of each genes expression to the mean of each cluster to define continuous weights. - -``` -library(projectR) -data(p.RNAseq6l3c3t) - - -nP<-5 -kClust<-kmeans(t(p.RNAseq6l3c3t),centers=nP) -kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t) -kpattern - -cluster2pattern(clusters = NA, NP = NA, data = NA) -``` - -### Input Arguments -The inputs that must be set each time are the clusters and data. - -The arguments are as follows: - -**`clusters`** a clustering object -**`NP`** either the number of clusters desired or the subset of clusters to use -**`data`** data used to make clusters object - - -### Output -The output of the `cluster2pattern` function is a `pclust` class object; specifically, a matrix of genes (rows) by clusters (columns). A gene's value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene's value is the correlation of the gene's expression to the mean of that cluster. - - -## intersectoR - -`intersectoR` function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for `hclust` and `kmeans` objects are also available. - -``` -library(projectR) -intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) -``` - -### Input Arguments -The inputs that must be set each time are the clusters and data. - -The arguments are as follows: - -**`pSet1`** a list for a set of patterns where each entry is a set of genes associated with a single pattern -**`pSet2`** a list for a second set of patterns where each entry is a set of genes associated with a single pattern -**`pval`** the maximum p-value considered significant -**`full`** logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix. -**`k`** numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order - - -### Output -The output of the `intersectoR` function is a summary matrix showing the sets with statistically significant overlap under the specified $p$-value threshold based on a hypergeometric test. If `full==TRUE` the full data frame of significantly overlapping sets will also be returned. - -# Correlation based projection - -Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the `projectR` function. This matrix can be user-generated or the result of the `correlateR` function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic `projectR` function. The `correlateR` function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows. - -## correlateR - -``` -library(projectR) -correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) -``` - -### Input Arguments -The inputs that must be set each time are only the genes and data, with all other inputs having default values. - -The arguments are as follows: - -**`genes`** gene or character vector of genes for reference expression pattern dat -**`data`** matrix or data frame with genes to be used for to calculate correlation -**`threshtype`** Default "R" indicates thresholding by R value or equivalent. Alternatively, "N" indicates a numerical cut off. -**`threshold`** numeric indicating value at which to make threshold -**`absR`** logical indicating where to include both positive and negatively correlated genes -**`...`** addtion imputes to the cor function - - -### Output -The output of the `correlateR` function is a `correlateR` class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the `correlateR` object before using it as input to the `projectR` function. - -## Obtaining and visualizing `correlateR` objects. - -```{r correlateR-exp} -# data to -library(projectR) -data("p.RNAseq6l3c3t") - -# get genes correlated to T -cor2T<-correlateR(genes="T", dat=p.RNAseq6l3c3t, threshtype="N", threshold=10, absR=TRUE) -cor2T <- cor2T@corM -### heatmap of genes more correlated to T -indx<-unlist(sapply(cor2T,rownames)) -indx <- as.vector(indx) -colnames(p.RNAseq6l3c3t)<-pd.RNAseq6l3c3t$sampleX -library(reshape2) -pm.RNAseq6l3c3t<-melt(cbind(p.RNAseq6l3c3t[indx,],indx)) - -library(gplots) -library(ggplot2) -library(viridis) -pCorT<-ggplot(pm.RNAseq6l3c3t, aes(variable, indx, fill = value)) + - geom_tile(colour="gray20", size=1.5, stat="identity") + - scale_fill_viridis(option="B") + - xlab("") + ylab("") + - scale_y_discrete(limits=indx) + - ggtitle("Ten genes most highly pos & neg correlated with T") + - theme( - panel.background = element_rect(fill="gray20"), - panel.border = element_rect(fill=NA,color="gray20", size=0.5, linetype="solid"), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.line = element_blank(), - axis.ticks = element_blank(), - axis.text = element_text(size=rel(1),hjust=1), - axis.text.x = element_text(angle = 90,vjust=.5), - legend.text = element_text(color="white", size=rel(1)), - legend.background = element_rect(fill="gray20"), - legend.position = "bottom", - legend.title=element_blank() -) - -``` -```{r, fig.show='hold', fig.width=10, fig.height=5, echo=FALSE} -pCorT -``` - -## Projecting correlateR objects. -```{r} -# data to project into from RNAseq6l3c3t expression data -data(p.ESepiGen4c1l) - -library(projectR) -cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE, - dataNames=map.ESepiGen4c1l$GeneSymbols) - -``` - - -# Differential features identification. - -## projectionDriveR - -Given loadings that define the weight of features (genes) in a given latent space (e.g. PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. - -``` -library(projectR) -projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, - pvalue, pattern_name, display = T, normalize_pattern = T, mode = "CI") - -``` - -### Input Arguments -The required inputs are two feature by sample (e.g. gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth. - -The arguments for projectionDriveR are: - -**`cellgroup1`** Matrix 1 with features as rows, samples as columns. -**`cellgroup2`** Matrix 2 with features as rows, samples as columns. -**`loadings`** Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space -**`loadingsNames`** Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used -**`pattern_name`** the column name of the loadings by which the features will be weighted -**`pvalue`** Determines the significance of the confidence interval to be calculated between the difference of means -**`display`** Boolean. Whether or not to plot the estimates of significant features. Default = T -**`normalize_pattern`** Boolean. Whether or not to normalize the average feature weight. Default = T -**`mode`** 'CI' or 'PV'. Specifies whether to run projectionDriveR in confidence interval mode or to generate p values. Default = "CI" - -### Output -The output of `projectionDriveR` is a list of length five `mean_ci` holds the confidence intervals for the difference in means for all features, `weighted_mean_ci` holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, `sig_genes` is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (`unweighted_sig_genes`), the weighted mean confidence intervals (`weighted_sig_genes`) and genes shared between the two (`significant_shared_genes`) . `plotted_ci` returns the ggplot figure of the confidence intervals, see `plotConfidenceIntervals` for documentation. - -### Identifying differential features associated with learned patterns - - -```{r projectionDriver, message = F, out.width="100%"} -options(width = 60) -library(projectR) -library(dplyr, warn.conflicts = F) -library(magick) - -#gene weights x pattern -data("retinal_patterns") - -#size-normed, log expression -data("microglial_counts") - -#size-normed, log expression -data("glial_counts") - -#the features by which to weight the difference in expression -pattern_to_weight <- "Pattern.24" -drivers <- projectionDriveR(microglial_counts, #expression matrix - glial_counts, #expression matrix - loadings = retinal_patterns, #feature x pattern dataframe - loadingsNames = NULL, - pattern_name = pattern_to_weight, #column name - pvalue = 1e-5, #pvalue before bonferroni correction - display = T, - normalize_pattern = T, #normalize feature weights - mode = "CI") #confidence interval mode - - -conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,] - -str(conf_intervals) - -``` - -## plotConfidenceIntervals - -### Input -The arguments for plotConfidenceIntervals are: - -**`confidence_intervals`** A dataframe of features x estimates -**`interval_name`** names of columns that contain the low and high estimates, respectively. (default: c("low","high")) -**`pattern_name`** string to use as the title for the plots -**`sort`** Boolean. Whether or not to sort genes by their estimates (default = T) -**`genes`** a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.) -**`weights`** weights of features to include as annotation (default = NULL will not include heatmap) -**`weights_clip`** quantile of data to clip color scale for improved visualization (default: 0.99) -**`weights_vis_norm`** Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none" -**`weighted`** Boolean. Specifies whether confidence intervals are weighted by a pattern or not. Default = "F" - -### Output -A list of the length three that includes confidence interval plots and relevant info. `ci_estimates_plot` is the point-range plot for the provided estimates. If called from within `projectionDriveR`, the unweighted estimates are used. `feature_order` is the vector of gene names in the order shown in the figure. `weights_heatmap` is a heatmap annotation of the gene loadings, in the same order as above. - -### Customize plotting of confidence intervals - -```{r} -suppressWarnings(library(cowplot)) -#order in ascending order of estimates -conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid) -gene_order <- rownames(conf_intervals) - -#add text labels for top and bottom n genes -conf_intervals$label_name <- NA_character_ -n <- 2 -idx <- c(1:n, (dim(conf_intervals)[1]-(n-1)):dim(conf_intervals)[1]) -gene_ids <- gene_order[idx] -conf_intervals$label_name[idx] <- gene_ids - -#the labels above can now be used as ggplot aesthetics -plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals - sort = F, #should genes be sorted by estimates - weights = drivers$normalized_weights[rownames(conf_intervals)], - pattern_name = pattern_to_weight, - weights_clip = 0.99, - weights_vis_norm = "none") - -pl1 <- plots_list[["ci_estimates_plot"]] + - ggrepel::geom_label_repel(aes(label = label_name), max.overlaps = 20, force = 50) - -pl2 <- plots_list[["weights_heatmap"]] - -#now plot the weighted differences -weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,] -plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, - sort = F, - pattern_name = pattern_to_weight, - weighted = T) - -pl3 <- plots_list_weighted[["ci_estimates_plot"]] + - xlab("Difference in weighted group means") + - theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank()) - -cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3) - - - -``` - -## multivariateAnalysisR - -This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. `multivariateAnalysisR` performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function. - -``` -library(projectR) -multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj, - dictionaries, customNames = NULL, exclusive = TRUE, - exportFolder = "", ANOVAwidth = 1000, - ANOVAheight = 1000, CIwidth = 1000, CIheight = 1000, - CIspacing = 1) -``` - -### Input Arguments -The required inputs are `patternKeys` (list of strings indicating the patterns to be evaluated), `seuratobj` (the Seurat Object data containing both clusters and patterns), and `dictionaries` (list of dictionary where each dictionary indicates the conditions each corresponding cluster has to satisfy). - -The arguments for `multivariateAnalysisR` are: - -**`significanceLevel`** Double value for testing significance in ANOVA test. -**`patternKeys`** List of strings indicating pattern subsets from seuratobj to be analyzed. -**`seuratobj`** Seurat Object Data containing patternKeys in meta.data. -**`dictionaries`** List of dictionaries indicating clusters to be compared. -**`customNames`** List of custom names for clusters in corresponding order. -**`exclusive`** Boolean value for determining interpolation between params in clusters. -**`exportFolder`** Name of folder to store exported graphs and CSV files. -**`ANOVAwidth`** Width of ANOVA png. -**`ANOVAheight`** Height of ANOVA png. -**`CIwidth`** Width of CI png. -**`CIheight`** Height of CI png. -**`CIspacing`** Spacing between each CI in CI graph. - - -### Output -`multivariateAnalysisR` returns a sorted list of the generated ANOVA and CI values. It also exports two pairs of exported PNG/CSV files: one for ANOVA analysis, another for CI. From the ANOVA analysis, researchers can see the general ranking of differential uses of patterns across the specified clusters. From the CI analysis, researchers can identify the specific differential use cases between every pair of clusters. - -### Comparing differential uses of patterns across different clusters -Demonstrative example will be added soon. - -# References - - - - - diff --git a/vignettes/projectR.bib b/vignettes/projectR.bib deleted file mode 100644 index 2b203e8..0000000 --- a/vignettes/projectR.bib +++ /dev/null @@ -1,99 +0,0 @@ -%% Created using Papers on Tue, 21 Mar 2017. -%% http://papersapp.com/papers/ - -@article{Li:2004ey, -author = {Li, Q and Ye, J and Kambhamettu, C}, -title = {{Linear projection methods in face recognition under unconstrained illuminations: A comparative study}}, -journal = {Computer Vision and Pattern {\ldots}}, -year = {2004} -} - -@article{Fertig:2010ei, -author = {Fertig, Elana J and Ding, Jie and Favorov, Alexander V and Parmigiani, Giovanni and Ochs, Michael F}, -title = {{CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data.}}, -journal = {Bioinformatics}, -year = {2010}, -volume = {26}, -number = {21}, -pages = {2792--2793}, -month = nov -} - -@article{Baffi:1999jz, -author = {Baffi, G and Martin, E B and Morris, A J}, -title = {{Non-linear projection to latent structures revisited: the quadratic PLS algorithm}}, -journal = {Computers {\&} Chemical Engineering}, -year = {1999}, -volume = {23}, -number = {3}, -pages = {395--411}, -month = feb -} - -@book{Anonymous:kur3KWsv, -title = {{Non-Standard Parameter Adaptation for Exploratory Data Analysis}} -} - -@article{Pan:2010dm, -author = {Pan, Sinno Jialin and Yang, Qiang}, -title = {{A Survey on Transfer Learning}}, -journal = {IEEE Transactions on Knowledge and Data Engineering}, -year = {2010}, -volume = {22}, -number = {10}, -pages = {1345--1359} -} - -@incollection{Barbakh:2009bw, -author = {Barbakh, Wesam Ashour and Wu, Ying and Fyfe, Colin}, -title = {{Review of Linear Projection Methods}}, -booktitle = {Non-Standard Parameter Adaptation for Exploratory Data Analysis}, -year = {2009}, -pages = {29--48}, -publisher = {Springer Berlin Heidelberg}, -address = {Berlin, Heidelberg} -} - -@article{Smyth:2004vq, -author = {Smyth, Gordon K}, -title = {{Linear models and empirical bayes methods for assessing differential expression in microarray experiments}}, -journal = {Stat Appl Genet Mol Biol}, -year = {2004}, -volume = {3}, -number = {1}, -pages = {3} -} - -@article{Sibisi1997, -author = {Sibisi, Sibusiso and Skilling, John}, -title = {Prior Distributions on Measure Space}, -journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, -volume = {59}, -number = {1}, -pages = {217-235}, -keywords = {density estimation, infinite divisibility, infinitely divisible process, kernel function, Lévy measure, π-process, spatial correlation}, -doi = {10.1111/1467-9868.00065}, -url = { -https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/1467-9868.00065}, -eprint = {https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/1467-9868.00065}, -abstract = {A measure is the formal representation of the non-negative additive functions that abound in science. We review and develop the art of assigning Bayesian priors to measures. Where necessary, spatial correlation is delegated to correlating kernels imposed on otherwise uncorrelated priors. The latter must be infinitely divisible (ID) and hence described by the Lévy–Khinchin representation. Thus the fundamental object is the Lévy measure, the choice of which corresponds to different ID process priors. The general case of a Lévy measure comprising a mixture of assigned base measures leads to a prior process comprising a convolution of corresponding processes. Examples involving a single base measure are the gamma process, the Dirichlet process (for the normalized case) and the Poisson process. We also discuss processes that we call the supergamma and super-Dirichlet processes, which are double base measure generalizations of the gamma and Dirichlet processes. Examples of multiple and continuum base measures are also discussed. We conclude with numerical examples of density estimation.}, -year = {1997} -} - -@article{Ochs2006, -author="Wang, Guoli -and Kossenkov, Andrew V. -and Ochs, Michael F.", -title="LS-NMF: A modified non-negative matrix factorization algorithm utilizing uncertainty estimates", -journal="BMC Bioinformatics", -year="2006", -month="Mar", -day="28", -volume="7", -number="1", -pages="175", -abstract="Non-negative matrix factorisation (NMF), a machine learning algorithm, has been applied to the analysis of microarray data. A key feature of NMF is the ability to identify patterns that together explain the data as a linear combination of expression signatures. Microarray data generally includes individual estimates of uncertainty for each gene in each condition, however NMF does not exploit this information. Previous work has shown that such uncertainties can be extremely valuable for pattern recognition.", -issn="1471-2105", -doi="10.1186/1471-2105-7-175", -url="https://doi.org/10.1186/1471-2105-7-175" -} diff --git a/vignettes/projectR.html b/vignettes/projectR.html deleted file mode 100644 index 90bada9..0000000 --- a/vignettes/projectR.html +++ /dev/null @@ -1,1403 +0,0 @@ - - - - - - - - - - - - - - - - - - - -projectR Vignette - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - -

Contents

- - -
-

1 Introduction

-

Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics—genomics, transcriptomics, epigenomics, proteomics, etc.—each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for in silico experiments, or additional information on the algorithm, see Stein-O’Brien, et al and Sharma, et al.

-
-
-

2 Getting started with projectR

-
-

2.1 Installation Instructions

-

For automatic Bioconductor package installation, start R, and run:

-
BiocManager::install("projectR")
-
-
-

2.2 Methods

-

Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to “omic” analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless.

-
-
-

2.3 The base projectR function

-

The generic projectR function is executed as follows:

-
library(projectR)
-projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false)
-
-

2.3.1 Input Arguments

-

The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e. a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the dataNames and loadingNames arguments can be manually supplied by the user.

-

The arguments are as follows:
-data a dataset to be projected into the pattern space
-loadings a matrix of continous values with unique rownames to be projected
-dataNames a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by rownames(data). Order of names in vector must match order of rows in data.
-loadingsNames a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by rownames(loadings). Order of names in vector must match order of rows in loadings.
-NP vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.
-full logical indicating whether to return the full model solution. By default only the new pattern object is returned.

-

The loadings argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF).

-
-
-

2.3.2 Output

-

The basic output of the base projectR function, i.e. full=FALSE, returns projectionPatterns representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. full=TRUE, returns projectionFit, a list containing projectionPatterns and Projection. The Projection object contains additional information from the proceedure used to obtain the projectionPatterns. For the the the base projectR function, Projection is the full lmFit model from the package limma.

-
-
-
-
-

3 PCA projection

-

Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, \(V\), and \(\Re^n\) which preserves inner products. If \(V\) is an inner product space over \(\Re\) with orthonormal basis \(B = v_1,...,v_n\) and \(v \epsilon V s.t [v]_B = (r_1,...,r_n)\), then finding the coordinate of \(v_i\) in \(v\) is precisely the inner product of \(v\) with \(v_i\), i.e. \(r_i = \langle v,v_i \rangle\). This formulation is implemented for only those genes belonging to both the new data and the PC space. The projectR function has S4 method for class prcomp.

-
-

3.1 Obtaining PCs to project.

-
# data to define PCs
-library(projectR)
-data(p.RNAseq6l3c3t)
-
-# do PCA on RNAseq6l3c3t expression data
-pc.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t))
-pcVAR <- round(((pc.RNAseq6l3c3t$sdev)^2/sum(pc.RNAseq6l3c3t$sdev^2))*100,2)
-dPCA <- data.frame(cbind(pc.RNAseq6l3c3t$x,pd.RNAseq6l3c3t))
-
-#plot pca
-library(ggplot2)
-setCOL <- scale_colour_manual(values = c("blue","black","red"), name="Condition:")
-setFILL <- scale_fill_manual(values = c("blue","black","red"),guide = FALSE)
-setPCH <- scale_shape_manual(values=c(23,22,25,25,21,24),name="Cell Line:")
-
-pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line,
-        fill=ID.cond)) +
-        geom_point(aes(size=days),alpha=.6)+
-        setCOL + setPCH  + setFILL +
-        scale_size_area(breaks = c(2,4,6), name="Day") +
-        theme(legend.position=c(0,0), legend.justification=c(0,0),
-              legend.direction = "horizontal",
-              panel.background = element_rect(fill = "white",colour=NA),
-              legend.background = element_rect(fill = "transparent",colour=NA),
-              plot.title = element_text(vjust = 0,hjust=0,face="bold")) +
-        labs(title = "PCA of hPSC PolyA RNAseq",
-            x=paste("PC1 (",pcVAR[1],"% of varience)",sep=""),
-            y=paste("PC2 (",pcVAR[2],"% of varience)",sep=""))
-
-
-

3.2 Projecting prcomp objects

-
# data to project into PCs from RNAseq6l3c3t expression data
-data(p.ESepiGen4c1l)
-
-library(projectR)
-PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t,
-full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]])
-
## [1] "93 row names matched between data and loadings"
-## [1] "Updated dimension of data: 93 9"
-
pd.ESepiGen4c1l<-data.frame(Condition=sapply(colnames(p.ESepiGen4c1l$mRNA.Seq),
-  function(x) unlist(strsplit(x,'_'))[1]),stringsAsFactors=FALSE)
-pd.ESepiGen4c1l$color<-c(rep("red",2),rep("green",3),rep("blue",2),rep("black",2))
-names(pd.ESepiGen4c1l$color)<-pd.ESepiGen4c1l$Cond
-
-dPCA2ESepi<- data.frame(cbind(t(PCA2ESepi[[1]]),pd.ESepiGen4c1l))
-
-#plot pca
-library(ggplot2)
-setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"),
-  guide = guide_legend(title="Lineage"))
-
-pPC2ESepiGen4c1l <- ggplot(dPCA2ESepi, aes(x=PC1, y=PC2, colour=Condition)) +
-  geom_point(size=5) + setEpiCOL +
-  theme(legend.position=c(0,0), legend.justification=c(0,0),
-  panel.background = element_rect(fill = "white"),
-  legend.direction = "horizontal",
-  plot.title = element_text(vjust = 0,hjust=0,face="bold")) +
-  labs(title = "Encode RNAseq in target PC1 & PC2",
-  x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""),
-  y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep=""))
-
-
-
-

4 NMF projection

-

NMF decomposes a data matrix of \(D\) with \(N\) genes as rows and \(M\) samples as columns, into two matrices, as \(D ~ AP\). The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (\({\bf{P}}\)) and the corresponding distribution matrix of weights (\({\bf{A}}\)) whose product forms a mock data matrix (\({\bf{M}}\)) that represents the gene-wise data \({\bf{D}}\) within noise limits (\(\boldsymbol{\varepsilon}\)). That is, -\[\begin{equation} -{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon} ..............(1) -\label{eq:matrixDecomp} -\end{equation}\] -The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling.

-

Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the limma package. The projectR function has S4 method for class Linear Embedding Matrix, LME.

-
library(projectR)
-projectR(data, loadings,dataNames = NULL, loadingsNames = NULL,
-     NP = NA, full = FALSE)
-
-

4.0.1 Input Arguments

-

The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names–rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running.

-

The arguments are as follows:

-

data a target dataset to be projected into the pattern space
-loadings a CogapsResult object
-dataNames rownames (eg. gene names) of the target dataset, if different from existing rownames of data
-loadingsNames loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames
-NP vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.
-full logical indicating whether to return the full model solution. By default only the new pattern object is returned.

-
-
-

4.0.2 Output

-

The basic output of the base projectR function, i.e. full=FALSE, returns projectionPatterns representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. full=TRUE, returns projectionFit, a list containing projectionPatterns and Projection. The Projection object contains additional information from the procedure used to obtain the projectionPatterns. For the the the base projectR function, Projection is the full lmFit model from the package limma.

-
-
-

4.1 Obtaining CoGAPS patterns to project.

-
# get data
-library(projectR)
-AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data
-AP <- AP$Amean
-# heatmap of gene weights for CoGAPs patterns
-library(gplots)
-
## 
-## Attaching package: 'gplots'
-
## The following object is masked from 'package:stats':
-## 
-##     lowess
-
par(mar=c(1,1,1,1))
-pNMF<-heatmap.2(as.matrix(AP),col=bluered, trace='none',
-          distfun=function(c) as.dist(1-cor(t(c))) ,
-          cexCol=1,cexRow=.5,scale = "row",
-          hclustfun=function(x) hclust(x, method="average")
-      )
-

-
-
-

4.2 Projecting CoGAPS objects

-
# data to project into PCs from RNAseq6l3c3t expression data
-library(projectR)
-data('p.ESepiGen4c1l4')
-
## Warning in data("p.ESepiGen4c1l4"): data set 'p.ESepiGen4c1l4' not found
-
data('p.RNAseq6l3c3t')
-
-NMF2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=AP,full=TRUE,
-    dataNames=map.ESepiGen4c1l[["GeneSymbols"]])
-
## [1] "93 row names matched between data and loadings"
-## [1] "Updated dimension of data: 93 9"
-
dNMF2ESepi<- data.frame(cbind(t(NMF2ESepi),pd.ESepiGen4c1l))
-
-#plot pca
-library(ggplot2)
-setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"),
-guide = guide_legend(title="Lineage"))
-
-pNMF2ESepiGen4c1l <- ggplot(dNMF2ESepi, aes(x=X1, y=X2, colour=Condition)) +
-  geom_point(size=5) + setEpiCOL +
-  theme(legend.position=c(0,0), legend.justification=c(0,0),
-  panel.background = element_rect(fill = "white"),
-  legend.direction = "horizontal",
-  plot.title = element_text(vjust = 0,hjust=0,face="bold"))
-  labs(title = "Encode RNAseq in target PC1 & PC2",
-  x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""),
-  y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep=""))
-
## $x
-## [1] "Projected PC1 (18.32% of varience)"
-## 
-## $y
-## [1] "Projected PC2 (17.12% of varience)"
-## 
-## $title
-## [1] "Encode RNAseq in target PC1 & PC2"
-## 
-## attr(,"class")
-## [1] "labels"
-
-
-
-

5 Clustering projection

-

As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the “projection” of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, cluster2pattern uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a pclust object which can serve as input to projectR. Alternatively, the intersectoR function can be used to test for significant overlap between two clustering objects. Both cluster2pattern and intersectoR methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections.

-
-

5.1 cluster2pattern

-

cluster2pattern uses the corelation of each genes expression to the mean of each cluster to define continuous weights.

-
library(projectR)
-data(p.RNAseq6l3c3t)
-
-
-nP<-5
-kClust<-kmeans(t(p.RNAseq6l3c3t),centers=nP)
-kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t)
-kpattern
-
-cluster2pattern(clusters = NA, NP = NA, data = NA)
-
-

5.1.1 Input Arguments

-

The inputs that must be set each time are the clusters and data.

-

The arguments are as follows:

-

clusters a clustering object
-NP either the number of clusters desired or the subset of clusters to use
-data data used to make clusters object

-
-
-

5.1.2 Output

-

The output of the cluster2pattern function is a pclust class object; specifically, a matrix of genes (rows) by clusters (columns). A gene’s value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene’s value is the correlation of the gene’s expression to the mean of that cluster.

-
-
-
-

5.2 intersectoR

-

intersectoR function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for hclust and kmeans objects are also available.

-
library(projectR)
-intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL)
-
-

5.2.1 Input Arguments

-

The inputs that must be set each time are the clusters and data.

-

The arguments are as follows:

-

pSet1 a list for a set of patterns where each entry is a set of genes associated with a single pattern
-pSet2 a list for a second set of patterns where each entry is a set of genes associated with a single pattern
-pval the maximum p-value considered significant
-full logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.
-k numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order

-
-
-

5.2.2 Output

-

The output of the intersectoR function is a summary matrix showing the sets with statistically significant overlap under the specified \(p\)-value threshold based on a hypergeometric test. If full==TRUE the full data frame of significantly overlapping sets will also be returned.

-
-
-
-
-

6 Correlation based projection

-

Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the projectR function. This matrix can be user-generated or the result of the correlateR function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic projectR function. The correlateR function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows.

-
-

6.1 correlateR

-
library(projectR)
-correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...)
-
-

6.1.1 Input Arguments

-

The inputs that must be set each time are only the genes and data, with all other inputs having default values.

-

The arguments are as follows:

-

genes gene or character vector of genes for reference expression pattern dat
-data matrix or data frame with genes to be used for to calculate correlation
-threshtype Default “R” indicates thresholding by R value or equivalent. Alternatively, “N” indicates a numerical cut off.
-threshold numeric indicating value at which to make threshold
-absR logical indicating where to include both positive and negatively correlated genes
-... addtion imputes to the cor function

-
-
-

6.1.2 Output

-

The output of the correlateR function is a correlateR class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the correlateR object before using it as input to the projectR function.

-
-
-
-

6.2 Obtaining and visualizing correlateR objects.

-
# data to
-library(projectR)
-data("p.RNAseq6l3c3t")
-
-# get genes correlated to T
-cor2T<-correlateR(genes="T", dat=p.RNAseq6l3c3t, threshtype="N", threshold=10, absR=TRUE)
-cor2T <- cor2T@corM
-### heatmap of genes more correlated to T
-indx<-unlist(sapply(cor2T,rownames))
-indx <- as.vector(indx)
-colnames(p.RNAseq6l3c3t)<-pd.RNAseq6l3c3t$sampleX
-library(reshape2)
-pm.RNAseq6l3c3t<-melt(cbind(p.RNAseq6l3c3t[indx,],indx))
-
## Using indx as id variables
-
library(gplots)
-library(ggplot2)
-library(viridis)
-
## Loading required package: viridisLite
-
pCorT<-ggplot(pm.RNAseq6l3c3t, aes(variable, indx, fill = value)) +
-  geom_tile(colour="gray20", size=1.5, stat="identity") +
-  scale_fill_viridis(option="B") +
-  xlab("") +  ylab("") +
-  scale_y_discrete(limits=indx) +
-  ggtitle("Ten genes most highly pos & neg correlated with T") +
-  theme(
-    panel.background = element_rect(fill="gray20"),
-    panel.border = element_rect(fill=NA,color="gray20", size=0.5, linetype="solid"),
-    panel.grid.major = element_blank(),
-    panel.grid.minor = element_blank(),
-    axis.line = element_blank(),
-    axis.ticks = element_blank(),
-    axis.text = element_text(size=rel(1),hjust=1),
-    axis.text.x = element_text(angle = 90,vjust=.5),
-    legend.text = element_text(color="white", size=rel(1)),
-    legend.background = element_rect(fill="gray20"),
-    legend.position = "bottom",
-    legend.title=element_blank()
-)
-
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
-## ℹ Please use `linewidth` instead.
-## This warning is displayed once every 8 hours.
-## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
-## generated.
-
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
-## ℹ Please use the `linewidth` argument instead.
-## This warning is displayed once every 8 hours.
-## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
-## generated.
-

-
-
-

6.3 Projecting correlateR objects.

-
# data to project into from RNAseq6l3c3t expression data
-data(p.ESepiGen4c1l)
-
-library(projectR)
-cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE,
-    dataNames=map.ESepiGen4c1l$GeneSymbols)
-
## [1] "9 row names matched between data and loadings"
-## [1] "Updated dimension of data: 9 9"
-
-
-
-

7 Differential features identification.

-
-

7.1 projectionDriveR

-

Given loadings that define the weight of features (genes) in a given latent space (e.g. PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages.

-
library(projectR)
-projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL,
-                 pvalue, pattern_name, display = T, normalize_pattern = T)
-
-
-

7.1.1 Input Arguments

-

The required inputs are two feature by sample (e.g. gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth.

-

The arguments for projectionDriveR are:

-

cellgroup1 Matrix 1 with features as rows, samples as columns.
-cellgroup2 Matrix 2 with features as rows, samples as columns.
-loadings Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space
-loadingsNames Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used
-pattern_name the column name of the loadings by which the features will be weighted
-pvalue Determines the significance of the confidence interval to be calculated between the difference of means
-display Boolean. Whether or not to plot the estimates of significant features. Default = T
-normalize_pattern Boolean. Whether or not to normalize the average feature weight. Default = T

-
-
-

7.1.2 Output

-

The output of projectionDriveR is a list of length five mean_ci holds the confidence intervals for the difference in means for all features, weighted_ci holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, significant_genes is a vector of gene names that are significantly different at the threshold provided. plotted_ci returns the ggplot figure of the confidence intervals, see plotConfidenceIntervals for documentation.

-
-
-

7.1.3 Identifying differential features associated with learned patterns

-
options(width = 60)
-library(projectR)
-library(dplyr, warn.conflicts = F)
-
-#gene weights x pattern
-data("retinal_patterns")
-
-#size-normed, log expression
-data("microglial_counts")
-
-#size-normed, log expression
-data("glial_counts")
-
-#the features by which to weight the difference in expression 
-pattern_to_weight <- "Pattern.24"
-drivers <- projectionDriveR(microglial_counts, #expression matrix
-                                       glial_counts, #expression matrix
-                                       loadings = retinal_patterns, #feature x pattern dataframe
-                                       loadingsNames = NULL,
-                                       pattern_name = pattern_to_weight, #column name
-                                       pvalue = 1e-5, #pvalue before bonferroni correction
-                                       display = T,
-                                       normalize_pattern = T) #normalize feature weights
-
## [1] "2996 row names matched between datasets"
-## [1] "2996"
-## [1] "Updated dimension of data: 2996"
-

-
conf_intervals <- drivers$mean_ci[drivers$significant_genes,]
-
-str(conf_intervals)
-
## 'data.frame':    253 obs. of  2 variables:
-##  $ low : num  1.86 0.158 -0.562 -0.756 0.155 ...
-##  $ high: num  2.03943 0.26729 -0.00197 -0.18521 0.23239 ...
-
-
-
-

7.2 plotConfidenceIntervals

-
-

7.2.1 Input

-

The arguments for plotConfidenceIntervals are:

-

confidence_intervals A dataframe of features x estimates
-interval_name names of columns that contain the low and high estimates, respectively. (default: c(“low”,“high”)) -pattern_name string to use as the title for the plots
-sort Boolean. Whether or not to sort genes by their estimates (default = T)
-genes a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.)
-weights weights of features to include as annotation (default = NULL will not include heatmap)
-weights_clip quantile of data to clip color scale for improved visualization (default: 0.99)
-weights_vis_norm Which processed version of weights to visualize as a heatmap. One of c(“none”, “quantile”). default = “none”

-
-
-

7.2.2 Output

-

A list of the length three that includes confidence interval plots and relevant info. ci_estimates_plot is the point-range plot for the provided estimates. If called from within projectionDriveR, the unweighted estimates are used. feature_order is the vector of gene names in the order shown in the figure. weights_heatmap is a heatmap annotation of the gene loadings, in the same order as above.

-
-
-

7.2.3 Customize plotting of confidence intervals

-
suppressWarnings(library(cowplot))
-#order in ascending order of estimates
-conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid)
-gene_order <- rownames(conf_intervals)
-
-#add text labels for top and bottom n genes
-conf_intervals$label_name <- NA_character_
-n <- 2
-idx <- c(1:n, (dim(conf_intervals)[1]-(n-1)):dim(conf_intervals)[1])
-gene_ids <- gene_order[idx]
-conf_intervals$label_name[idx] <- gene_ids
-
-#the labels above can now be used as ggplot aesthetics
-plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals
-                                      sort = F, #should genes be sorted by estimates
-                                      weights = drivers$normalized_weights[rownames(conf_intervals)],
-                                      pattern_name = pattern_to_weight,
-                                      weights_clip = 0.99,
-                                      weights_vis_norm = "none")
-
-pl1 <- plots_list[["ci_estimates_plot"]] +
-  ggrepel::geom_label_repel(aes(label = label_name), max.overlaps = 20, force = 50)
-
-pl2 <- plots_list[["weights_heatmap"]]
-
-#now plot the weighted differences
-weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,]
-plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals,
-                                               sort = F,
-                                               pattern_name = pattern_to_weight)
-
-pl3 <- plots_list_weighted[["ci_estimates_plot"]] +
-  xlab("Difference in weighted group means") +
-  theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank())
-
-cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3)
-
## Warning: Removed 249 rows containing missing values
-## (`geom_label_repel()`).
-

-
-
-
-

7.3 multivariateAnalysisR

-

This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. multivariateAnalysisR performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function.

-
library(projectR)
-multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj,
-                                  dictionaries, customNames = NULL, exclusive = TRUE,
-                                  exportFolder = "", ANOVAwidth = 1000,
-                                  ANOVAheight = 1000, CIwidth = 1000, CIheight = 1000,
-                                  CIspacing = 1)
-
-

7.3.1 Input Arguments

-

The required inputs are patternKeys (list of strings indicating the patterns to be evaluated), seuratobj (the Seurat Object data containing both clusters and patterns), and dictionaries (list of dictionary where each dictionary indicates the conditions each corresponding cluster has to satisfy).

-

The arguments for multivariateAnalysisR are:

-

significanceLevel Double value for testing significance in ANOVA test.
-patternKeys List of strings indicating pattern subsets from seuratobj to be analyzed.
-seuratobj Seurat Object Data containing patternKeys in meta.data. -dictionaries List of dictionaries indicating clusters to be compared.
-customNames List of custom names for clusters in corresponding order. -exclusive Boolean value for determining interpolation between params in clusters. -exportFolder Name of folder to store exported graphs and CSV files. -ANOVAwidth Width of ANOVA png. -ANOVAheight Height of ANOVA png. -CIwidth Width of CI png. -CIheight Height of CI png. -CIspacing Spacing between each CI in CI graph.

-
-
-

7.3.2 Output

-

multivariateAnalysisR returns a sorted list of the generated ANOVA and CI values. It also exports two pairs of exported PNG/CSV files: one for ANOVA analysis, another for CI. From the ANOVA analysis, researchers can see the general ranking of differential uses of patterns across the specified clusters. From the CI analysis, researchers can identify the specific differential use cases between every pair of clusters.

-
-
-

7.3.3 Comparing differential uses of patterns across different clusters

-

Demonstrative example will be added soon.

-
-
-
-
-

References

-
-
-Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. Review of Linear Projection Methods.” In Non-Standard Parameter Adaptation for Exploratory Data Analysis, 29–48. Berlin, Heidelberg: Springer Berlin Heidelberg. -
-
-Sibisi, Sibusiso, and John Skilling. 1997. “Prior Distributions on Measure Space.” Journal of the Royal Statistical Society: Series B (Statistical Methodology) 59 (1): 217–35. https://doi.org/10.1111/1467-9868.00065. -
-
-Wang, Guoli, Andrew V. Kossenkov, and Michael F. Ochs. 2006. “LS-NMF: A Modified Non-Negative Matrix Factorization Algorithm Utilizing Uncertainty Estimates.” BMC Bioinformatics 7 (1): 175. https://doi.org/10.1186/1471-2105-7-175. -
-
-
- - - - -
- - - - - - - - - - - - - - - - - - diff --git a/vignettes/projectR.tex b/vignettes/projectR.tex deleted file mode 100644 index 5789790..0000000 --- a/vignettes/projectR.tex +++ /dev/null @@ -1,758 +0,0 @@ -\documentclass[]{article} -\usepackage{lmodern} -\usepackage{amssymb,amsmath} -\usepackage{ifxetex,ifluatex} -\usepackage{fixltx2e} % provides \textsubscript -\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex - \usepackage[T1]{fontenc} - \usepackage[utf8]{inputenc} -\else % if luatex or xelatex - \ifxetex - \usepackage{mathspec} - \else - \usepackage{fontspec} - \fi - \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} -\fi -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} -% use microtype if available -\IfFileExists{microtype.sty}{% -\usepackage{microtype} -\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts -}{} - - -\usepackage{longtable,booktabs} -\usepackage{graphicx} -% grffile has become a legacy package: https://ctan.org/pkg/grffile -\IfFileExists{grffile.sty}{% -\usepackage{grffile} -}{} -\makeatletter -\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} -\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} -\makeatother -% Scale images if necessary, so that they will not overflow the page -% margins by default, and it is still possible to overwrite the defaults -% using explicit options in \includegraphics[width, height, ...]{} -\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} -\IfFileExists{parskip.sty}{% -\usepackage{parskip} -}{% else -\setlength{\parindent}{0pt} -\setlength{\parskip}{6pt plus 2pt minus 1pt} -} -\setlength{\emergencystretch}{3em} % prevent overfull lines -\providecommand{\tightlist}{% - \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} -\setcounter{secnumdepth}{5} - -%%% Use protect on footnotes to avoid problems with footnotes in titles -\let\rmarkdownfootnote\footnote% -\def\footnote{\protect\rmarkdownfootnote} - -%%% Change title format to be more compact -\usepackage{titling} - -% Create subtitle command for use in maketitle -\providecommand{\subtitle}[1]{ - \posttitle{ - \begin{center}\large#1\end{center} - } -} - -\setlength{\droptitle}{-2em} - -\RequirePackage[]{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/Bioconductor} - -\bioctitle[]{projectR Vignette} - \pretitle{\vspace{\droptitle}\centering\huge} - \posttitle{\par} -\author{Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien} - \preauthor{\centering\large\emph} - \postauthor{\par} - \predate{\centering\large\emph} - \postdate{\par} - \date{20 May 2022} - -% code highlighting -\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251} -\makeatletter -\@ifundefined{AddToHook}{}{\AddToHook{package/xcolor/after}{\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251}}} -\makeatother -\newcommand{\hlnum}[1]{\textcolor[rgb]{0.816,0.125,0.439}{#1}}% -\newcommand{\hlstr}[1]{\textcolor[rgb]{0.251,0.627,0.251}{#1}}% -\newcommand{\hlcom}[1]{\textcolor[rgb]{0.502,0.502,0.502}{\textit{#1}}}% -\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% -\newcommand{\hlstd}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% -\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.125,0.125,0.941}{#1}}% -\newcommand{\hlkwb}[1]{\textcolor[rgb]{0,0,0}{#1}}% -\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% -\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.878,0.439,0.125}{#1}}% -\let\hlipl\hlkwb -% -\usepackage{fancyvrb} -\newcommand{\VerbBar}{|} -\newcommand{\VERB}{\Verb[commandchars=\\\{\}]} -\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} -% -\newenvironment{Shaded}{\begin{myshaded}}{\end{myshaded}} -% set background for result chunks -\let\oldverbatim\verbatim -\renewenvironment{verbatim}{\color{codecolor}\begin{myshaded}\begin{oldverbatim}}{\end{oldverbatim}\end{myshaded}} -% -\newcommand{\KeywordTok}[1]{\hlkwd{#1}} -\newcommand{\DataTypeTok}[1]{\hlkwc{#1}} -\newcommand{\DecValTok}[1]{\hlnum{#1}} -\newcommand{\BaseNTok}[1]{\hlnum{#1}} -\newcommand{\FloatTok}[1]{\hlnum{#1}} -\newcommand{\ConstantTok}[1]{\hlnum{#1}} -\newcommand{\CharTok}[1]{\hlstr{#1}} -\newcommand{\SpecialCharTok}[1]{\hlstr{#1}} -\newcommand{\StringTok}[1]{\hlstr{#1}} -\newcommand{\VerbatimStringTok}[1]{\hlstr{#1}} -\newcommand{\SpecialStringTok}[1]{\hlstr{#1}} -\newcommand{\ImportTok}[1]{{#1}} -\newcommand{\CommentTok}[1]{\hlcom{#1}} -\newcommand{\DocumentationTok}[1]{\hlcom{#1}} -\newcommand{\AnnotationTok}[1]{\hlcom{#1}} -\newcommand{\CommentVarTok}[1]{\hlcom{#1}} -\newcommand{\OtherTok}[1]{{#1}} -\newcommand{\FunctionTok}[1]{\hlstd{#1}} -\newcommand{\VariableTok}[1]{\hlstd{#1}} -\newcommand{\ControlFlowTok}[1]{\hlkwd{#1}} -\newcommand{\OperatorTok}[1]{\hlopt{#1}} -\newcommand{\BuiltInTok}[1]{{#1}} -\newcommand{\ExtensionTok}[1]{{#1}} -\newcommand{\PreprocessorTok}[1]{\textit{#1}} -\newcommand{\AttributeTok}[1]{{#1}} -\newcommand{\RegionMarkerTok}[1]{{#1}} -\newcommand{\InformationTok}[1]{\textcolor{messagecolor}{#1}} -\newcommand{\WarningTok}[1]{\textcolor{warningcolor}{#1}} -\newcommand{\AlertTok}[1]{\textcolor{errorcolor}{#1}} -\newcommand{\ErrorTok}[1]{\textcolor{errorcolor}{#1}} -\newcommand{\NormalTok}[1]{\hlstd{#1}} -% -\AtBeginDocument{\bibliographystyle{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/unsrturl}} - - -\begin{document} -\maketitle - - -{ -\setcounter{tocdepth}{2} -\tableofcontents -\newpage -} -\hypertarget{introduction}{% -\section{Introduction}\label{introduction}} - -Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics---genomics, transcriptomics, epigenomics, proteomics, etc.---each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for \emph{in silico} experiments, or additional information on the algorithm, see \href{https://www.sciencedirect.com/science/article/pii/S2405471219301462}{Stein-O'Brien, et al}. - -\hypertarget{getting-started-with-projectr}{% -\section{Getting started with projectR}\label{getting-started-with-projectr}} - -\hypertarget{installation-instructions}{% -\subsection{Installation Instructions}\label{installation-instructions}} - -For automatic Bioconductor package installation, start R, and run: - -\begin{verbatim} -BiocManager::install("projectR") -\end{verbatim} - -\hypertarget{methods}{% -\subsection{Methods}\label{methods}} - -Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to ``omic'' analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless. - -\hypertarget{the-base-projectr-function}{% -\subsection{The base projectR function}\label{the-base-projectr-function}} - -The generic projectR function is executed as follows: - -\begin{verbatim} -library(projectR) -projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) -\end{verbatim} - -\hypertarget{input-arguments}{% -\subsubsection{Input Arguments}\label{input-arguments}} - -The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e.~a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the /code\{dataNames\} and /code\{loadingNames\} arguments can be manually supplied by the user. - -The arguments are as follows: - -\begin{description} -\item[data]{a dataset to be projected into the pattern space} -\item[loadings]{a matrix of continous values with unique rownames to be projected} -\item[dataNames]{a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by \texttt{rownames(data)}. Order of names in vector must match order of rows in data.} -\item[loadingsNames]{a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by \texttt{rownames(loadings)}. Order of names in vector must match order of rows in loadings.} -\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} -\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} -\end{description} - -The \texttt{loadings} argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF). - -\hypertarget{output}{% -\subsubsection{Output}\label{output}} - -The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the proceedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. - -\hypertarget{pca-projection}{% -\section{PCA projection}\label{pca-projection}} - -Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, \(V\), and \(\Re^n\) which preserves inner products. If \(V\) is an inner product space over \(\Re\) with orthonormal basis \(B = v_1,...,v_n\) and \(v \epsilon V s.t [v]_B = (r_1,...,r_n)\), then finding the coordinate of \(v_i\) in \(v\) is precisely the inner product of \(v\) with \(v_i\), i.e.~\(r_i = \langle v,v_i \rangle\). This formulation is implemented for only those genes belonging to both the new data and the PC space. The \texttt{projectR} function has S4 method for class \texttt{prcomp}. - -\hypertarget{obtaining-pcs-to-project.}{% -\subsection{Obtaining PCs to project.}\label{obtaining-pcs-to-project.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to define PCs} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{data}\NormalTok{(p.RNAseq6l3c3t)} - -\CommentTok{\# do PCA on RNAseq6l3c3t expression data} -\NormalTok{pc.RNAseq6l3c3t<{-}}\KeywordTok{prcomp}\NormalTok{(}\KeywordTok{t}\NormalTok{(p.RNAseq6l3c3t))} -\NormalTok{pcVAR <{-}}\StringTok{ }\KeywordTok{round}\NormalTok{(((pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev)}\OperatorTok{\^{}}\DecValTok{2}\OperatorTok{/}\KeywordTok{sum}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev}\OperatorTok{\^{}}\DecValTok{2}\NormalTok{))}\OperatorTok{*}\DecValTok{100}\NormalTok{,}\DecValTok{2}\NormalTok{)} -\NormalTok{dPCA <{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{x,pd.RNAseq6l3c3t))} - -\CommentTok{\#plot pca} -\KeywordTok{library}\NormalTok{(ggplot2)} -\NormalTok{setCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Condition:"}\NormalTok{)} -\NormalTok{setFILL <{-}}\StringTok{ }\KeywordTok{scale\_fill\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{),}\DataTypeTok{guide =} \OtherTok{FALSE}\NormalTok{)} -\NormalTok{setPCH <{-}}\StringTok{ }\KeywordTok{scale\_shape\_manual}\NormalTok{(}\DataTypeTok{values=}\KeywordTok{c}\NormalTok{(}\DecValTok{23}\NormalTok{,}\DecValTok{22}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{21}\NormalTok{,}\DecValTok{24}\NormalTok{),}\DataTypeTok{name=}\StringTok{"Cell Line:"}\NormalTok{)} - -\NormalTok{pPCA <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{ID.cond, }\DataTypeTok{shape=}\NormalTok{ID.line,} - \DataTypeTok{fill=}\NormalTok{ID.cond)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{size=}\NormalTok{days),}\DataTypeTok{alpha=}\NormalTok{.}\DecValTok{6}\NormalTok{)}\OperatorTok{+} -\StringTok{ }\NormalTok{setCOL }\OperatorTok{+}\StringTok{ }\NormalTok{setPCH }\OperatorTok{+}\StringTok{ }\NormalTok{setFILL }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale\_size\_area}\NormalTok{(}\DataTypeTok{breaks =} \KeywordTok{c}\NormalTok{(}\DecValTok{2}\NormalTok{,}\DecValTok{4}\NormalTok{,}\DecValTok{6}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Day"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} - \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} - \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"transparent"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} - \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"PCA of hPSC PolyA RNAseq"}\NormalTok{,} - \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC1 ("}\NormalTok{,pcVAR[}\DecValTok{1}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} - \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC2 ("}\NormalTok{,pcVAR[}\DecValTok{2}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} -\end{Highlighting} -\end{Shaded} - -\hypertarget{projecting-prcomp-objects}{% -\subsection{Projecting prcomp objects}\label{projecting-prcomp-objects}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} -\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} - -\KeywordTok{library}\NormalTok{(projectR)} -\NormalTok{PCA2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(}\DataTypeTok{data =}\NormalTok{ p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{pc.RNAseq6l3c3t,} -\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{, }\DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} -\CommentTok{\#\# [1] "93 row names matched between data and loadings"} -\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} - -\NormalTok{pd.ESepiGen4c1l<{-}}\KeywordTok{data.frame}\NormalTok{(}\DataTypeTok{Condition=}\KeywordTok{sapply}\NormalTok{(}\KeywordTok{colnames}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq),} - \ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{unlist}\NormalTok{(}\KeywordTok{strsplit}\NormalTok{(x,}\StringTok{\textquotesingle{}\_\textquotesingle{}}\NormalTok{))[}\DecValTok{1}\NormalTok{]),}\DataTypeTok{stringsAsFactors=}\OtherTok{FALSE}\NormalTok{)} -\NormalTok{pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color<{-}}\KeywordTok{c}\NormalTok{(}\KeywordTok{rep}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"green"}\NormalTok{,}\DecValTok{3}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"black"}\NormalTok{,}\DecValTok{2}\NormalTok{))} -\KeywordTok{names}\NormalTok{(pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color)<{-}pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{Cond} - -\NormalTok{dPCA2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(PCA2ESepi[[}\DecValTok{1}\NormalTok{]]),pd.ESepiGen4c1l))} - -\CommentTok{\#plot pca} -\KeywordTok{library}\NormalTok{(ggplot2)} -\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} - \DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} - -\NormalTok{pPC2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} - \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} - \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} - \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} - \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## Warning: package 'gridExtra' was built under R version 4.0.5 -## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please -## use `guide = "none"` instead. -\end{verbatim} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-2-1} \end{adjustwidth} - -\hypertarget{nmf-projection}{% -\section{NMF projection}\label{nmf-projection}} - -NMF decomposes a data matrix of \(D\) with \(N\) genes as rows and \(M\) samples as columns, into two matrices, as \(D ~ AP\). The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (\({\bf{P}}\)) and the corresponding distribution matrix of weights (\({\bf{A}}\)) whose product forms a mock data matrix (\({\bf{M}}\)) that represents the gene-wise data \({\bf{D}}\) within noise limits (\(\boldsymbol{\varepsilon}\)). That is, -\begin{equation} -{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon}. -\label{eq:matrixDecomp} -\end{equation} -The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling. - -Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in \ref{eq:matrixDecomp} for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}} package. The \texttt{projectR} function has S4 method for class \texttt{Linear Embedding Matrix, LME}. - -\begin{verbatim} -library(projectR) -projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, - NP = NA, full = FALSE) -\end{verbatim} - -\hypertarget{input-arguments-1}{% -\subsubsection{Input Arguments}\label{input-arguments-1}} - -The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names--rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running. - -The arguments are as follows: - -\begin{description} -\item[data]{a target dataset to be projected into the pattern space} -\item[loadings]{a CogapsResult object} -\item[dataNames]{rownames (eg. gene names) of the target dataset, if different from existing rownames of data} -\item[loadingsNames] loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames -\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} -\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} -\end{description} - -\hypertarget{output-1}{% -\subsubsection{Output}\label{output-1}} - -The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the procedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. - -\hypertarget{obtaining-cogaps-patterns-to-project.}{% -\subsection{Obtaining CoGAPS patterns to project.}\label{obtaining-cogaps-patterns-to-project.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# get data} -\KeywordTok{library}\NormalTok{(projectR)} -\NormalTok{AP <{-}}\StringTok{ }\KeywordTok{get}\NormalTok{(}\KeywordTok{data}\NormalTok{(}\StringTok{"AP.RNAseq6l3c3t"}\NormalTok{)) }\CommentTok{\#CoGAPS run data} -\NormalTok{AP <{-}}\StringTok{ }\NormalTok{AP}\OperatorTok{$}\NormalTok{Amean} -\CommentTok{\# heatmap of gene weights for CoGAPs patterns} -\KeywordTok{library}\NormalTok{(gplots)} -\CommentTok{\#\# Warning: package \textquotesingle{}gplots\textquotesingle{} was built under R version 4.0.5} -\CommentTok{\#\# } -\CommentTok{\#\# Attaching package: \textquotesingle{}gplots\textquotesingle{}} -\CommentTok{\#\# The following object is masked from \textquotesingle{}package:projectR\textquotesingle{}:} -\CommentTok{\#\# } -\CommentTok{\#\# lowess} -\CommentTok{\#\# The following object is masked from \textquotesingle{}package:stats\textquotesingle{}:} -\CommentTok{\#\# } -\CommentTok{\#\# lowess} -\KeywordTok{par}\NormalTok{(}\DataTypeTok{mar=}\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{))} -\NormalTok{pNMF<{-}}\KeywordTok{heatmap.2}\NormalTok{(}\KeywordTok{as.matrix}\NormalTok{(AP),}\DataTypeTok{col=}\NormalTok{bluered, }\DataTypeTok{trace=}\StringTok{\textquotesingle{}none\textquotesingle{}}\NormalTok{,} - \DataTypeTok{distfun=}\ControlFlowTok{function}\NormalTok{(c) }\KeywordTok{as.dist}\NormalTok{(}\DecValTok{1}\OperatorTok{{-}}\KeywordTok{cor}\NormalTok{(}\KeywordTok{t}\NormalTok{(c))) ,} - \DataTypeTok{cexCol=}\DecValTok{1}\NormalTok{,}\DataTypeTok{cexRow=}\NormalTok{.}\DecValTok{5}\NormalTok{,}\DataTypeTok{scale =} \StringTok{"row"}\NormalTok{,} - \DataTypeTok{hclustfun=}\ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{hclust}\NormalTok{(x, }\DataTypeTok{method=}\StringTok{"average"}\NormalTok{)} -\NormalTok{ )} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-3-1} \end{adjustwidth} - -\hypertarget{projecting-cogaps-objects}{% -\subsection{Projecting CoGAPS objects}\label{projecting-cogaps-objects}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.ESepiGen4c1l4\textquotesingle{}}\NormalTok{)} -\CommentTok{\#\# Warning in data("p.ESepiGen4c1l4"): data set \textquotesingle{}p.ESepiGen4c1l4\textquotesingle{} not found} -\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.RNAseq6l3c3t\textquotesingle{}}\NormalTok{)} - -\NormalTok{NMF2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{AP,}\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{,} - \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} -\CommentTok{\#\# [1] "93 row names matched between data and loadings"} -\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} - -\NormalTok{dNMF2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(NMF2ESepi),pd.ESepiGen4c1l))} - -\CommentTok{\#plot pca} -\KeywordTok{library}\NormalTok{(ggplot2)} -\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} -\DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} - -\NormalTok{pNMF2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dNMF2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{X1, }\DataTypeTok{y=}\NormalTok{X2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} - \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} - \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{))} - \KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} - \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} - \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} -\CommentTok{\#\# $x} -\CommentTok{\#\# [1] "Projected PC1 (18.36\% of varience)"} -\CommentTok{\#\# } -\CommentTok{\#\# $y} -\CommentTok{\#\# [1] "Projected PC2 (17.15\% of varience)"} -\CommentTok{\#\# } -\CommentTok{\#\# $title} -\CommentTok{\#\# [1] "Encode RNAseq in target PC1 \& PC2"} -\CommentTok{\#\# } -\CommentTok{\#\# attr(,"class")} -\CommentTok{\#\# [1] "labels"} -\end{Highlighting} -\end{Shaded} - -\hypertarget{clustering-projection}{% -\section{Clustering projection}\label{clustering-projection}} - -As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the ``projection'' of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, \texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a \texttt{pclust} object which can serve as input to \texttt{projectR}. Alternatively, the \texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. Both \texttt{cluster2pattern} and \texttt{intersectoR} methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections. - -\hypertarget{cluster2pattern}{% -\subsection{cluster2pattern}\label{cluster2pattern}} - -\texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. - -\begin{verbatim} -library(projectR) -data(p.RNAseq6l3c3t) - - -nP<-5 -kClust<-kmeans(p.RNAseq6l3c3t,centers=nP) -kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t) -kpattern - -cluster2pattern(clusters = NA, NP = NA, data = NA) -\end{verbatim} - -\hypertarget{input-arguments-2}{% -\subsubsection{Input Arguments}\label{input-arguments-2}} - -The inputs that must be set each time are the clusters and data. - -The arguments are as follows: - -\begin{description} -\item[clusters]{a clustering object} -\item[NP]{either the number of clusters desired or the subset of clusters to use} -\item[data]{data used to make clusters object} -\end{description} - -\hypertarget{output-2}{% -\subsubsection{Output}\label{output-2}} - -The output of the \texttt{cluster2pattern} function is a \texttt{pclust} class object; specifically, a matrix of genes (rows) by clusters (columns). A gene's value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene's value is the correlation of the gene's expression to the mean of that cluster. - -\hypertarget{intersector}{% -\subsection{intersectoR}\label{intersector}} - -\texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for \texttt{hclust} and \texttt{kmeans} objects are also available. - -\begin{verbatim} -library(projectR) -intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) -\end{verbatim} - -\hypertarget{input-arguments-3}{% -\subsubsection{Input Arguments}\label{input-arguments-3}} - -The inputs that must be set each time are the clusters and data. - -The arguments are as follows: - -\begin{description} -\item[pSet1]{a list for a set of patterns where each entry is a set of genes associated with a single pattern} -\item[pSet2]{a list for a second set of patterns where each entry is a set of genes associated with a single pattern} -\item[pval]{the maximum p-value considered significant} -\item[full]{logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.} -\item[k]{numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order} -\end{description} - -\hypertarget{output-3}{% -\subsubsection{Output}\label{output-3}} - -The output of the \texttt{intersectoR} function is a summary matrix showing the sets with statistically significant overlap under the specified \(p\)-value threshold based on a hypergeometric test. If \texttt{full==TRUE} the full data frame of significantly overlapping sets will also be returned. - -\hypertarget{correlation-based-projection}{% -\section{Correlation based projection}\label{correlation-based-projection}} - -Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the \texttt{projectR} function. This matrix can be user-generated or the result of the \texttt{correlateR} function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic \texttt{projectR} function. The \texttt{correlateR} function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows. - -\hypertarget{correlater}{% -\subsection{correlateR}\label{correlater}} - -\begin{verbatim} -library(projectR) -correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) -\end{verbatim} - -\hypertarget{input-arguments-4}{% -\subsubsection{Input Arguments}\label{input-arguments-4}} - -The inputs that must be set each time are only the genes and data, with all other inputs having default values. - -The arguments are as follows: - -\begin{description} -\item[genes]{gene or character vector of genes for reference expression pattern dat} -\item[data]{matrix or data frame with genes to be used for to calculate correlation} -\item[threshtype]{Default "R" indicates thresholding by R value or equivalent. Alternatively, "N" indicates a numerical cut off.} -\item[threshold]{numeric indicating value at which to make threshold} -\item[absR]{logical indicating where to include both positive and negatively correlated genes} -\item[...]{addtion imputes to the cor function} -\end{description} - -\hypertarget{output-4}{% -\subsubsection{Output}\label{output-4}} - -The output of the \texttt{correlateR} function is a \texttt{correlateR} class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the \texttt{correlateR} object before using it as input to the \texttt{projectR} function. - -\hypertarget{obtaining-and-visualizing-objects.}{% -\subsection{\texorpdfstring{Obtaining and visualizing \texttt{correlateR} objects.}{Obtaining and visualizing objects.}}\label{obtaining-and-visualizing-objects.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{data}\NormalTok{(}\StringTok{"p.RNAseq6l3c3t"}\NormalTok{)} - -\CommentTok{\# get genes correlated to T} -\NormalTok{cor2T<{-}}\KeywordTok{correlateR}\NormalTok{(}\DataTypeTok{genes=}\StringTok{"T"}\NormalTok{, }\DataTypeTok{dat=}\NormalTok{p.RNAseq6l3c3t, }\DataTypeTok{threshtype=}\StringTok{"N"}\NormalTok{, }\DataTypeTok{threshold=}\DecValTok{10}\NormalTok{, }\DataTypeTok{absR=}\OtherTok{TRUE}\NormalTok{)} -\NormalTok{cor2T <{-}}\StringTok{ }\NormalTok{cor2T}\OperatorTok{@}\NormalTok{corM} -\CommentTok{\#\#\# heatmap of genes more correlated to T} -\NormalTok{indx<{-}}\KeywordTok{unlist}\NormalTok{(}\KeywordTok{sapply}\NormalTok{(cor2T,rownames))} -\NormalTok{indx <{-}}\StringTok{ }\KeywordTok{as.vector}\NormalTok{(indx)} -\KeywordTok{colnames}\NormalTok{(p.RNAseq6l3c3t)<{-}pd.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sampleX} -\KeywordTok{library}\NormalTok{(reshape2)} -\CommentTok{\#\# Warning: package \textquotesingle{}reshape2\textquotesingle{} was built under R version 4.0.5} -\NormalTok{pm.RNAseq6l3c3t<{-}}\KeywordTok{melt}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(p.RNAseq6l3c3t[indx,],indx))} -\CommentTok{\#\# Using indx as id variables} - -\KeywordTok{library}\NormalTok{(gplots)} -\KeywordTok{library}\NormalTok{(ggplot2)} -\KeywordTok{library}\NormalTok{(viridis)} -\CommentTok{\#\# Warning: package \textquotesingle{}viridis\textquotesingle{} was built under R version 4.0.5} -\CommentTok{\#\# Loading required package: viridisLite} -\CommentTok{\#\# Warning: package \textquotesingle{}viridisLite\textquotesingle{} was built under R version 4.0.5} -\NormalTok{pCorT<{-}}\KeywordTok{ggplot}\NormalTok{(pm.RNAseq6l3c3t, }\KeywordTok{aes}\NormalTok{(variable, indx, }\DataTypeTok{fill =}\NormalTok{ value)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_tile}\NormalTok{(}\DataTypeTok{colour=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{1.5}\NormalTok{, }\DataTypeTok{stat=}\StringTok{"identity"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale\_fill\_viridis}\NormalTok{(}\DataTypeTok{option=}\StringTok{"B"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+}\StringTok{ }\KeywordTok{ylab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale\_y\_discrete}\NormalTok{(}\DataTypeTok{limits=}\NormalTok{indx) }\OperatorTok{+} -\StringTok{ }\KeywordTok{ggtitle}\NormalTok{(}\StringTok{"Ten genes most highly pos \& neg correlated with T"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} - \DataTypeTok{panel.border =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\OtherTok{NA}\NormalTok{,}\DataTypeTok{color=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{0.5}\NormalTok{, }\DataTypeTok{linetype=}\StringTok{"solid"}\NormalTok{),} - \DataTypeTok{panel.grid.major =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{panel.grid.minor =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{axis.line =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{axis.ticks =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{axis.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{),}\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{),} - \DataTypeTok{axis.text.x =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{angle =} \DecValTok{90}\NormalTok{,}\DataTypeTok{vjust=}\NormalTok{.}\DecValTok{5}\NormalTok{),} - \DataTypeTok{legend.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{color=}\StringTok{"white"}\NormalTok{, }\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{)),} - \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} - \DataTypeTok{legend.position =} \StringTok{"bottom"}\NormalTok{,} - \DataTypeTok{legend.title=}\KeywordTok{element\_blank}\NormalTok{()} -\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-5-1} \end{adjustwidth} - -\hypertarget{projecting-correlater-objects.}{% -\subsection{Projecting correlateR objects.}\label{projecting-correlater-objects.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to project into from RNAseq6l3c3t expression data} -\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} - -\KeywordTok{library}\NormalTok{(projectR)} -\NormalTok{cor2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{cor2T[[}\DecValTok{1}\NormalTok{]],}\DataTypeTok{full=}\OtherTok{FALSE}\NormalTok{,} - \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l}\OperatorTok{$}\NormalTok{GeneSymbols)} -\CommentTok{\#\# [1] "9 row names matched between data and loadings"} -\CommentTok{\#\# [1] "Updated dimension of data: 9 9"} -\end{Highlighting} -\end{Shaded} - -\hypertarget{differential-features-identification.}{% -\section{Differential features identification.}\label{differential-features-identification.}} - -\hypertarget{projectiondriver}{% -\subsection{projectionDriveR}\label{projectiondriver}} - -Given loadings that define the weight of features (genes) in a given latent space (e.g.~PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. - -\begin{verbatim} -library(projectR) -projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, - pvalue, pattern_name, display = T, normalize_pattern = T) -\end{verbatim} - -\hypertarget{input-arguments-5}{% -\subsubsection{Input Arguments}\label{input-arguments-5}} - -The required inputs are two feature by sample (e.g.~gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth. - -The arguments for projectionDriveR are: - -\begin{description} -\item[cellgroup1]{Matrix 1 with features as rows, samples as columns.} -\item[cellgroup2]{Matrix 2 with features as rows, samples as columns.} -\item[loadings]{Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space} -\item[loadingsNames]{Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used} -\item[pattern\_name]{the column name of the loadings by which the features will be weighted} -\item[pvalue]{Determines the significance of the confidence interval to be calculated between the difference of means} -\item[display]{Boolean. Whether or not to plot the estimates of significant features. Default = T} -\item[normalize\_pattern]{Boolean. Whether or not to normalize the average feature weight. Default = T} -\end{description} - -\hypertarget{output-5}{% -\subsubsection{Output}\label{output-5}} - -The output of \texttt{projectionDriveR} is a list of length five \texttt{mean\_ci} holds the confidence intervals for the difference in means for all features, \texttt{weighted\_ci} holds the confidence intervals for the weighted difference in means for all features, and normalized\_weights are the weights themselves. In addition, \texttt{significant\_genes} is a vector of gene names that are significantly different at the threshold provided. \texttt{plotted\_ci} returns the ggplot figure of the confidence intervals, see \texttt{plotConfidenceIntervals} for documentation. - -\hypertarget{identifying-differential-features-associated-with-learned-patterns}{% -\subsubsection{Identifying differential features associated with learned patterns}\label{identifying-differential-features-associated-with-learned-patterns}} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{options}\NormalTok{(}\DataTypeTok{width =} \DecValTok{60}\NormalTok{)} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{library}\NormalTok{(dplyr, }\DataTypeTok{warn.conflicts =}\NormalTok{ F)} -\CommentTok{\#\# Warning: package \textquotesingle{}dplyr\textquotesingle{} was built under R version 4.0.5} - -\CommentTok{\#gene weights x pattern} -\KeywordTok{data}\NormalTok{(}\StringTok{"retinal\_patterns"}\NormalTok{)} - -\CommentTok{\#size{-}normed, log expression} -\KeywordTok{data}\NormalTok{(}\StringTok{"microglial\_counts"}\NormalTok{)} - -\CommentTok{\#size{-}normed, log expression} -\KeywordTok{data}\NormalTok{(}\StringTok{"glial\_counts"}\NormalTok{)} - -\CommentTok{\#the features by which to weight the difference in expression } -\NormalTok{pattern\_to\_weight <{-}}\StringTok{ "Pattern.24"} -\NormalTok{drivers <{-}}\StringTok{ }\KeywordTok{projectionDriveR}\NormalTok{(microglial\_counts, }\CommentTok{\#expression matrix} -\NormalTok{ glial\_counts, }\CommentTok{\#expression matrix} - \DataTypeTok{loadings =}\NormalTok{ retinal\_patterns, }\CommentTok{\#feature x pattern dataframe} - \DataTypeTok{loadingsNames =} \OtherTok{NULL}\NormalTok{,} - \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight, }\CommentTok{\#column name} - \DataTypeTok{pvalue =} \FloatTok{1e{-}5}\NormalTok{, }\CommentTok{\#pvalue before bonferroni correction} - \DataTypeTok{display =}\NormalTok{ T,} - \DataTypeTok{normalize\_pattern =}\NormalTok{ T) }\CommentTok{\#normalize feature weights} -\CommentTok{\#\# [1] "2996 row names matched between datasets"} -\CommentTok{\#\# [1] "2996"} -\CommentTok{\#\# [1] "Updated dimension of data: 2996"} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/projectionDriver-1} \end{adjustwidth} - -\begin{Shaded} -\begin{Highlighting}[] - -\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{mean\_ci[drivers}\OperatorTok{$}\NormalTok{significant\_genes,]} - -\KeywordTok{str}\NormalTok{(conf\_intervals)} -\CommentTok{\#\# \textquotesingle{}data.frame\textquotesingle{}: 253 obs. of 2 variables:} -\CommentTok{\#\# $ low : num 1.86 0.158 {-}0.562 {-}0.756 0.155 ...} -\CommentTok{\#\# $ high: num 2.03943 0.26729 {-}0.00197 {-}0.18521 0.23239 ...} -\end{Highlighting} -\end{Shaded} - -\hypertarget{plotconfidenceintervals}{% -\subsection{plotConfidenceIntervals}\label{plotconfidenceintervals}} - -\hypertarget{input}{% -\subsubsection{Input}\label{input}} - -The arguments for plotConfidenceIntervals are: - -\begin{description} -\item[confidence\_intervals]{A dataframe of features x estimates} -\item[interval\_name]{names of columns that contain the low and high estimates, respectively. -(default: c("low","high"))} -\item[pattern\_name]{string to use as the title for the plots} -\item[sort]{Boolean. Whether or not to sort genes by their estimates (default = T)} -\item[genes]{a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.)} -\item[weights]{weights of features to include as annotation (default = NULL will not include heatmap)} -\item[weights\_clip]{quantile of data to clip color scale for improved visualization (default: 0.99)} -\item[weights\_vis\_norm]{Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none"} -\end{description} - -\hypertarget{output-6}{% -\subsubsection{Output}\label{output-6}} - -A list of the length three that includes confidence interval plots and relevant info. \texttt{ci\_estimates\_plot} is the point-range plot for the provided estimates. If called from within \texttt{projectionDriveR}, the unweighted estimates are used. \texttt{feature\_order} is the vector of gene names in the order shown in the figure. \texttt{weights\_heatmap} is a heatmap annotation of the gene loadings, in the same order as above. - -\hypertarget{customize-plotting-of-confidence-intervals}{% -\subsubsection{Customize plotting of confidence intervals}\label{customize-plotting-of-confidence-intervals}} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{library}\NormalTok{(cowplot)} -\CommentTok{\#\# Warning: package \textquotesingle{}cowplot\textquotesingle{} was built under R version 4.0.5} -\CommentTok{\#order in ascending order of estimates} -\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{conf\_intervals }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{mid =}\NormalTok{ (high}\OperatorTok{+}\NormalTok{low)}\OperatorTok{/}\DecValTok{2}\NormalTok{) }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{arrange}\NormalTok{(mid)} -\NormalTok{gene\_order <{-}}\StringTok{ }\KeywordTok{rownames}\NormalTok{(conf\_intervals)} - -\CommentTok{\#add text labels for top and bottom n genes} -\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name <{-}}\StringTok{ }\OtherTok{NA\_character\_} -\NormalTok{n <{-}}\StringTok{ }\DecValTok{2} -\NormalTok{idx <{-}}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\OperatorTok{:}\NormalTok{n, (}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{]}\OperatorTok{{-}}\NormalTok{(n}\DecValTok{{-}1}\NormalTok{))}\OperatorTok{:}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{])} -\NormalTok{gene\_ids <{-}}\StringTok{ }\NormalTok{gene\_order[idx]} -\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name[idx] <{-}}\StringTok{ }\NormalTok{gene\_ids} - -\CommentTok{\#the labels above can now be used as ggplot aesthetics} -\NormalTok{plots\_list <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(conf\_intervals, }\CommentTok{\#mean difference in expression confidence intervals} - \DataTypeTok{sort =}\NormalTok{ F, }\CommentTok{\#should genes be sorted by estimates} - \DataTypeTok{weights =}\NormalTok{ drivers}\OperatorTok{$}\NormalTok{normalized\_weights[}\KeywordTok{rownames}\NormalTok{(conf\_intervals)],} - \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight,} - \DataTypeTok{weights\_clip =} \FloatTok{0.99}\NormalTok{,} - \DataTypeTok{weights\_vis\_norm =} \StringTok{"none"}\NormalTok{)} - -\NormalTok{pl1 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} -\StringTok{ }\NormalTok{ggrepel}\OperatorTok{::}\KeywordTok{geom\_label\_repel}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{label =}\NormalTok{ label\_name), }\DataTypeTok{max.overlaps =} \DecValTok{20}\NormalTok{, }\DataTypeTok{force =} \DecValTok{50}\NormalTok{)} - -\NormalTok{pl2 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"weights\_heatmap"}\NormalTok{]]} - -\CommentTok{\#now plot the weighted differences} -\NormalTok{weighted\_conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{weighted\_mean\_ci[gene\_order,]} -\NormalTok{plots\_list\_weighted <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(weighted\_conf\_intervals,} - \DataTypeTok{sort =}\NormalTok{ F,} - \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight)} - -\NormalTok{pl3 <{-}}\StringTok{ }\NormalTok{plots\_list\_weighted[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} -\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{"Difference in weighted group means"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.title.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.ticks.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.text.y =} \KeywordTok{element\_blank}\NormalTok{())} - -\NormalTok{cowplot}\OperatorTok{::}\KeywordTok{plot\_grid}\NormalTok{(pl1, pl2, pl3, }\DataTypeTok{align =} \StringTok{"h"}\NormalTok{, }\DataTypeTok{rel\_widths =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,.}\DecValTok{4}\NormalTok{, }\DecValTok{1}\NormalTok{), }\DataTypeTok{ncol =} \DecValTok{3}\NormalTok{)} -\CommentTok{\#\# Warning: Removed 249 rows containing missing values} -\CommentTok{\#\# (geom\_label\_repel).} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-7-1} \end{adjustwidth} - -\hypertarget{refs}{} -\begin{cslreferences} -\leavevmode\hypertarget{ref-Barbakh:2009bw}{}% -Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. ``Review of Linear Projection Methods.'' In \emph{Non-Standard Parameter Adaptation for Exploratory Data Analysis}, 29--48. Berlin, Heidelberg: Springer Berlin Heidelberg. - -\leavevmode\hypertarget{ref-Sibisi1997}{}% -Sibisi, Sibusiso, and John Skilling. 1997. ``Prior Distributions on Measure Space.'' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)} 59 (1): 217--35. \url{https://doi.org/10.1111/1467-9868.00065}. - -\leavevmode\hypertarget{ref-Ochs2006}{}% -Wang, Guoli, Andrew V. Kossenkov, and Michael F. Ochs. 2006. ``LS-Nmf: A Modified Non-Negative Matrix Factorization Algorithm Utilizing Uncertainty Estimates.'' \emph{BMC Bioinformatics} 7 (1): 175. \url{https://doi.org/10.1186/1471-2105-7-175}. -\end{cslreferences} - - -\end{document} From d2c36274b19464919a44ed9565d1443ecdb6f8bf Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 15:23:15 -0500 Subject: [PATCH 10/33] update vignette update projectR.html --- vignettes/projectR.Rmd | 535 +++++++++++++++ vignettes/projectR.bib | 99 +++ vignettes/projectR.html | 1410 +++++++++++++++++++++++++++++++++++++++ vignettes/projectR.tex | 758 +++++++++++++++++++++ 4 files changed, 2802 insertions(+) create mode 100644 vignettes/projectR.Rmd create mode 100644 vignettes/projectR.bib create mode 100644 vignettes/projectR.html create mode 100644 vignettes/projectR.tex diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd new file mode 100644 index 0000000..fb4c08e --- /dev/null +++ b/vignettes/projectR.Rmd @@ -0,0 +1,535 @@ +--- +title: "projectR Vignette" +author: +- "Gaurav Sharma" +- "Charles Shin" +- "Jared N. Slosberg" +- "Loyal A. Goff" +- "Genevieve L. Stein-O'Brien" + +date: "`r BiocStyle::doc_date()`" +output: BiocStyle::html_document +bibliography: projectR.bib +description: | + Functions for the Projection of Weights from PCA, CoGAPS, NMF, Correlation, and Clustering +vignette: > + %\VignetteIndexEntry{projectR} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +options(scipen = 1, digits = 2) +set.seed(1234) +``` + +# Introduction +Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics—genomics, transcriptomics, epigenomics, proteomics, etc.—each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for *in silico* experiments, or additional information on the algorithm, see [Stein-O'Brien, et al](https://www.sciencedirect.com/science/article/pii/S2405471219301462) and [Sharma, et al](https://academic.oup.com/bioinformatics/article/36/11/3592/5804979). + +# Getting started with projectR + +## Installation Instructions + +For automatic Bioconductor package installation, start R, and run: +``` +BiocManager::install("genesofeve/projectR@projectionDriveR") +``` + +## Methods + +Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function $\varphi(x)=y : \Re^{D} \mapsto \Re^{d}$ s.t. $d \leq D$ for $x \in \Re^{D}, y \in \Re^{d}$ @Barbakh:2009bw . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to "omic" analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless. + +## The base projectR function + +The generic projectR function is executed as follows: +``` +library(projectR) +projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) +``` + +### Input Arguments +The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e. a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the ``dataNames`` and `loadingNames` arguments can be manually supplied by the user. + +The arguments are as follows: +**`data`** a dataset to be projected into the pattern space +**`loadings`** a matrix of continous values with unique rownames to be projected +**`dataNames`** a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by `rownames(data)`. Order of names in vector must match order of rows in data. +**`loadingsNames`** a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by `rownames(loadings)`. Order of names in vector must match order of rows in loadings. +**`NP`** vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix. +**`full`** logical indicating whether to return the full model solution. By default only the new pattern object is returned. + +The `loadings` argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF). + +### Output +The basic output of the base projectR function, i.e. `full=FALSE`, returns `projectionPatterns` representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. `full=TRUE`, returns `projectionFit`, a list containing `projectionPatterns` and `Projection`. The `Projection` object contains additional information from the proceedure used to obtain the `projectionPatterns`. For the the the base projectR function, `Projection` is the full `lmFit` model from the package `r BiocStyle::Biocpkg("limma")`. + +# PCA projection +Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, $V$, and $\Re^n$ which preserves inner products. If $V$ is an inner product space over $\Re$ with orthonormal basis $B = v_1,...,v_n$ and $v \epsilon V s.t [v]_B = (r_1,...,r_n)$, then finding the coordinate of $v_i$ in $v$ is precisely the inner product of $v$ with $v_i$, i.e. $r_i = \langle v,v_i \rangle$. This formulation is implemented for only those genes belonging to both the new data and the PC space. The **`projectR`** function has S4 method for class `prcomp`. + +## Obtaining PCs to project. +```{r prcomp, warning=FALSE} +# data to define PCs +library(projectR) +data(p.RNAseq6l3c3t) + +# do PCA on RNAseq6l3c3t expression data +pc.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t)) +pcVAR <- round(((pc.RNAseq6l3c3t$sdev)^2/sum(pc.RNAseq6l3c3t$sdev^2))*100,2) +dPCA <- data.frame(cbind(pc.RNAseq6l3c3t$x,pd.RNAseq6l3c3t)) + +#plot pca +library(ggplot2) +setCOL <- scale_colour_manual(values = c("blue","black","red"), name="Condition:") +setFILL <- scale_fill_manual(values = c("blue","black","red"),guide = FALSE) +setPCH <- scale_shape_manual(values=c(23,22,25,25,21,24),name="Cell Line:") + +pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line, + fill=ID.cond)) + + geom_point(aes(size=days),alpha=.6)+ + setCOL + setPCH + setFILL + + scale_size_area(breaks = c(2,4,6), name="Day") + + theme(legend.position=c(0,0), legend.justification=c(0,0), + legend.direction = "horizontal", + panel.background = element_rect(fill = "white",colour=NA), + legend.background = element_rect(fill = "transparent",colour=NA), + plot.title = element_text(vjust = 0,hjust=0,face="bold")) + + labs(title = "PCA of hPSC PolyA RNAseq", + x=paste("PC1 (",pcVAR[1],"% of varience)",sep=""), + y=paste("PC2 (",pcVAR[2],"% of varience)",sep="")) +``` + +## Projecting prcomp objects +```{r projectR.prcomp, warning=FALSE} +# data to project into PCs from RNAseq6l3c3t expression data +data(p.ESepiGen4c1l) + +library(projectR) +PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t, +full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) + +pd.ESepiGen4c1l<-data.frame(Condition=sapply(colnames(p.ESepiGen4c1l$mRNA.Seq), + function(x) unlist(strsplit(x,'_'))[1]),stringsAsFactors=FALSE) +pd.ESepiGen4c1l$color<-c(rep("red",2),rep("green",3),rep("blue",2),rep("black",2)) +names(pd.ESepiGen4c1l$color)<-pd.ESepiGen4c1l$Cond + +dPCA2ESepi<- data.frame(cbind(t(PCA2ESepi[[1]]),pd.ESepiGen4c1l)) + +#plot pca +library(ggplot2) +setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"), + guide = guide_legend(title="Lineage")) + +pPC2ESepiGen4c1l <- ggplot(dPCA2ESepi, aes(x=PC1, y=PC2, colour=Condition)) + + geom_point(size=5) + setEpiCOL + + theme(legend.position=c(0,0), legend.justification=c(0,0), + panel.background = element_rect(fill = "white"), + legend.direction = "horizontal", + plot.title = element_text(vjust = 0,hjust=0,face="bold")) + + labs(title = "Encode RNAseq in target PC1 & PC2", + x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""), + y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep="")) + +``` + +```{r, fig.show='hold', fig.width=10, fig.height=5, echo=FALSE, message= FALSE} +library(gridExtra) +#grid.arrange(pPCA,pPC2ESepiGen4c1l,nrow=1) +``` + +# NMF projection +NMF decomposes a data matrix of $D$ with $N$ genes as rows and $M$ samples as columns, into two matrices, as $D ~ AP$. The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (${\bf{P}}$) and the corresponding distribution matrix of weights (${\bf{A}}$) whose product forms a mock data matrix (${\bf{M}}$) that represents the gene-wise data ${\bf{D}}$ within noise limits ($\boldsymbol{\varepsilon}$). That is, +\begin{equation} +{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon} ..............(1) +\label{eq:matrixDecomp} +\end{equation} +The number of rows in ${\bf{P}}$ (columns in ${\bf{A}}$) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm @Ochs2006, the matrices ${\bf{A}}$ and ${\bf{P}}$ in CoGAPS are assumed to have the atomic prior described in @Sibisi1997. In the CoGAPS/GWCoGAPS implementation, $\alpha_{A}$ and $\alpha_{P}$ are corresponding parameters for the expected number of atoms which map to each matrix element in ${\bf{A}}$ and ${\bf{P}}$, respectively. The corresponding matrices ${\bf{A}}$ and ${\bf{P}}$ are found by MCMC sampling. + +Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where ${\bf{A}}$ is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns ${\bf{P}}$ in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the `lmFit` function in the `r BiocStyle::Biocpkg("limma")` package. The `projectR` function has S4 method for class `Linear Embedding Matrix, LME`. + +``` +library(projectR) +projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, + NP = NA, full = FALSE) +``` + +### Input Arguments +The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names--rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running. + +The arguments are as follows: + +**`data`** a target dataset to be projected into the pattern space +**`loadings`** a CogapsResult object +**`dataNames`** rownames (eg. gene names) of the target dataset, if different from existing rownames of data +**`loadingsNames`** loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames +**`NP`** vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix. +**`full`** logical indicating whether to return the full model solution. By default only the new pattern object is returned. + + +### Output +The basic output of the base projectR function, i.e. `full=FALSE`, returns `projectionPatterns` representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. `full=TRUE`, returns `projectionFit`, a list containing `projectionPatterns` and `Projection`. The `Projection` object contains additional information from the procedure used to obtain the `projectionPatterns`. For the the the base projectR function, `Projection` is the full `lmFit` model from the package `r BiocStyle::Biocpkg('limma')`. + +## Obtaining CoGAPS patterns to project. + +```{r} +# get data +library(projectR) +AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data +AP <- AP$Amean +# heatmap of gene weights for CoGAPs patterns +library(gplots) +par(mar=c(1,1,1,1)) +pNMF<-heatmap.2(as.matrix(AP),col=bluered, trace='none', + distfun=function(c) as.dist(1-cor(t(c))) , + cexCol=1,cexRow=.5,scale = "row", + hclustfun=function(x) hclust(x, method="average") + ) +``` + +## Projecting CoGAPS objects +```{r} +# data to project into PCs from RNAseq6l3c3t expression data +library(projectR) +data('p.ESepiGen4c1l4') +data('p.RNAseq6l3c3t') + +NMF2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=AP,full=TRUE, + dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) + +dNMF2ESepi<- data.frame(cbind(t(NMF2ESepi),pd.ESepiGen4c1l)) + +#plot pca +library(ggplot2) +setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"), +guide = guide_legend(title="Lineage")) + +pNMF2ESepiGen4c1l <- ggplot(dNMF2ESepi, aes(x=X1, y=X2, colour=Condition)) + + geom_point(size=5) + setEpiCOL + + theme(legend.position=c(0,0), legend.justification=c(0,0), + panel.background = element_rect(fill = "white"), + legend.direction = "horizontal", + plot.title = element_text(vjust = 0,hjust=0,face="bold")) + labs(title = "Encode RNAseq in target PC1 & PC2", + x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""), + y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep="")) +``` + +# Clustering projection + +As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the "projection" of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, `cluster2pattern` uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a `pclust` object which can serve as input to `projectR`. Alternatively, the `intersectoR` function can be used to test for significant overlap between two clustering objects. Both `cluster2pattern` and `intersectoR` methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections. + +## cluster2pattern + +`cluster2pattern` uses the corelation of each genes expression to the mean of each cluster to define continuous weights. + +``` +library(projectR) +data(p.RNAseq6l3c3t) + + +nP<-5 +kClust<-kmeans(t(p.RNAseq6l3c3t),centers=nP) +kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t) +kpattern + +cluster2pattern(clusters = NA, NP = NA, data = NA) +``` + +### Input Arguments +The inputs that must be set each time are the clusters and data. + +The arguments are as follows: + +**`clusters`** a clustering object +**`NP`** either the number of clusters desired or the subset of clusters to use +**`data`** data used to make clusters object + + +### Output +The output of the `cluster2pattern` function is a `pclust` class object; specifically, a matrix of genes (rows) by clusters (columns). A gene's value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene's value is the correlation of the gene's expression to the mean of that cluster. + + +## intersectoR + +`intersectoR` function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for `hclust` and `kmeans` objects are also available. + +``` +library(projectR) +intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) +``` + +### Input Arguments +The inputs that must be set each time are the clusters and data. + +The arguments are as follows: + +**`pSet1`** a list for a set of patterns where each entry is a set of genes associated with a single pattern +**`pSet2`** a list for a second set of patterns where each entry is a set of genes associated with a single pattern +**`pval`** the maximum p-value considered significant +**`full`** logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix. +**`k`** numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order + + +### Output +The output of the `intersectoR` function is a summary matrix showing the sets with statistically significant overlap under the specified $p$-value threshold based on a hypergeometric test. If `full==TRUE` the full data frame of significantly overlapping sets will also be returned. + +# Correlation based projection + +Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the `projectR` function. This matrix can be user-generated or the result of the `correlateR` function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic `projectR` function. The `correlateR` function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows. + +## correlateR + +``` +library(projectR) +correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) +``` + +### Input Arguments +The inputs that must be set each time are only the genes and data, with all other inputs having default values. + +The arguments are as follows: + +**`genes`** gene or character vector of genes for reference expression pattern dat +**`data`** matrix or data frame with genes to be used for to calculate correlation +**`threshtype`** Default "R" indicates thresholding by R value or equivalent. Alternatively, "N" indicates a numerical cut off. +**`threshold`** numeric indicating value at which to make threshold +**`absR`** logical indicating where to include both positive and negatively correlated genes +**`...`** addtion imputes to the cor function + + +### Output +The output of the `correlateR` function is a `correlateR` class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the `correlateR` object before using it as input to the `projectR` function. + +## Obtaining and visualizing `correlateR` objects. + +```{r correlateR-exp} +# data to +library(projectR) +data("p.RNAseq6l3c3t") + +# get genes correlated to T +cor2T<-correlateR(genes="T", dat=p.RNAseq6l3c3t, threshtype="N", threshold=10, absR=TRUE) +cor2T <- cor2T@corM +### heatmap of genes more correlated to T +indx<-unlist(sapply(cor2T,rownames)) +indx <- as.vector(indx) +colnames(p.RNAseq6l3c3t)<-pd.RNAseq6l3c3t$sampleX +library(reshape2) +pm.RNAseq6l3c3t<-melt(cbind(p.RNAseq6l3c3t[indx,],indx)) + +library(gplots) +library(ggplot2) +library(viridis) +pCorT<-ggplot(pm.RNAseq6l3c3t, aes(variable, indx, fill = value)) + + geom_tile(colour="gray20", size=1.5, stat="identity") + + scale_fill_viridis(option="B") + + xlab("") + ylab("") + + scale_y_discrete(limits=indx) + + ggtitle("Ten genes most highly pos & neg correlated with T") + + theme( + panel.background = element_rect(fill="gray20"), + panel.border = element_rect(fill=NA,color="gray20", size=0.5, linetype="solid"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + axis.line = element_blank(), + axis.ticks = element_blank(), + axis.text = element_text(size=rel(1),hjust=1), + axis.text.x = element_text(angle = 90,vjust=.5), + legend.text = element_text(color="white", size=rel(1)), + legend.background = element_rect(fill="gray20"), + legend.position = "bottom", + legend.title=element_blank() +) + +``` +```{r, fig.show='hold', fig.width=10, fig.height=5, echo=FALSE} +pCorT +``` + +## Projecting correlateR objects. +```{r} +# data to project into from RNAseq6l3c3t expression data +data(p.ESepiGen4c1l) + +library(projectR) +cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE, + dataNames=map.ESepiGen4c1l$GeneSymbols) + +``` + + +# Differential features identification. + +## projectionDriveR + +Given loadings that define the weight of features (genes) in a given latent space (e.g. PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. + +``` +library(projectR) +projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, + pvalue, pattern_name, display = T, normalize_pattern = T, mode = "CI") + +``` + +### Input Arguments +The required inputs are two feature by sample (e.g. gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth. + +The arguments for projectionDriveR are: + +**`cellgroup1`** Matrix 1 with features as rows, samples as columns. +**`cellgroup2`** Matrix 2 with features as rows, samples as columns. +**`loadings`** Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space +**`loadingsNames`** Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used +**`pattern_name`** the column name of the loadings by which the features will be weighted +**`pvalue`** Determines the significance of the confidence interval to be calculated between the difference of means +**`display`** Boolean. Whether or not to plot the estimates of significant features. Default = T +**`normalize_pattern`** Boolean. Whether or not to normalize the average feature weight. Default = T +**`mode`** 'CI' or 'PV'. Specifies whether to run projectionDriveR in confidence interval mode or to generate p values. Default = "CI" + +### Output +The output of `projectionDriveR` is a list of length five `mean_ci` holds the confidence intervals for the difference in means for all features, `weighted_mean_ci` holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, `sig_genes` is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (`unweighted_sig_genes`), the weighted mean confidence intervals (`weighted_sig_genes`) and genes shared between the two (`significant_shared_genes`) . `plotted_ci` returns the ggplot figure of the confidence intervals, see `plotConfidenceIntervals` for documentation. + +### Identifying differential features associated with learned patterns + + +```{r projectionDriver, message = F, out.width="100%"} +options(width = 60) +library(projectR) +library(dplyr, warn.conflicts = F) +library(magick) + +#gene weights x pattern +data("retinal_patterns") + +#size-normed, log expression +data("microglial_counts") + +#size-normed, log expression +data("glial_counts") + +#the features by which to weight the difference in expression +pattern_to_weight <- "Pattern.24" +drivers <- projectionDriveR(microglial_counts, #expression matrix + glial_counts, #expression matrix + loadings = retinal_patterns, #feature x pattern dataframe + loadingsNames = NULL, + pattern_name = pattern_to_weight, #column name + pvalue = 1e-5, #pvalue before bonferroni correction + display = T, + normalize_pattern = T, #normalize feature weights + mode = "CI") #confidence interval mode + + +conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,] + +str(conf_intervals) + +``` + +## plotConfidenceIntervals + +### Input +The arguments for plotConfidenceIntervals are: + +**`confidence_intervals`** A dataframe of features x estimates +**`interval_name`** names of columns that contain the low and high estimates, respectively. (default: c("low","high")) +**`pattern_name`** string to use as the title for the plots +**`sort`** Boolean. Whether or not to sort genes by their estimates (default = T) +**`genes`** a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.) +**`weights`** weights of features to include as annotation (default = NULL will not include heatmap) +**`weights_clip`** quantile of data to clip color scale for improved visualization (default: 0.99) +**`weights_vis_norm`** Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none" +**`weighted`** Boolean. Specifies whether confidence intervals are weighted by a pattern or not. Default = "F" + +### Output +A list of the length three that includes confidence interval plots and relevant info. `ci_estimates_plot` is the point-range plot for the provided estimates. If called from within `projectionDriveR`, the unweighted estimates are used. `feature_order` is the vector of gene names in the order shown in the figure. `weights_heatmap` is a heatmap annotation of the gene loadings, in the same order as above. + +### Customize plotting of confidence intervals + +```{r} +suppressWarnings(library(cowplot)) +#order in ascending order of estimates +conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid) +gene_order <- rownames(conf_intervals) + +#add text labels for top and bottom n genes +conf_intervals$label_name <- NA_character_ +n <- 2 +idx <- c(1:n, (dim(conf_intervals)[1]-(n-1)):dim(conf_intervals)[1]) +gene_ids <- gene_order[idx] +conf_intervals$label_name[idx] <- gene_ids + +#the labels above can now be used as ggplot aesthetics +plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals + sort = F, #should genes be sorted by estimates + weights = drivers$normalized_weights[rownames(conf_intervals)], + pattern_name = pattern_to_weight, + weights_clip = 0.99, + weights_vis_norm = "none") + +pl1 <- plots_list[["ci_estimates_plot"]] + + ggrepel::geom_label_repel(aes(label = label_name), max.overlaps = 20, force = 50) + +pl2 <- plots_list[["weights_heatmap"]] + +#now plot the weighted differences +weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,] +plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, + sort = F, + pattern_name = pattern_to_weight, + weighted = T) + +pl3 <- plots_list_weighted[["ci_estimates_plot"]] + + xlab("Difference in weighted group means") + + theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + +cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3) + + + +``` + +## multivariateAnalysisR + +This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. `multivariateAnalysisR` performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function. + +``` +library(projectR) +multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj, + dictionaries, customNames = NULL, exclusive = TRUE, + exportFolder = "", ANOVAwidth = 1000, + ANOVAheight = 1000, CIwidth = 1000, CIheight = 1000, + CIspacing = 1) +``` + +### Input Arguments +The required inputs are `patternKeys` (list of strings indicating the patterns to be evaluated), `seuratobj` (the Seurat Object data containing both clusters and patterns), and `dictionaries` (list of dictionary where each dictionary indicates the conditions each corresponding cluster has to satisfy). + +The arguments for `multivariateAnalysisR` are: + +**`significanceLevel`** Double value for testing significance in ANOVA test. +**`patternKeys`** List of strings indicating pattern subsets from seuratobj to be analyzed. +**`seuratobj`** Seurat Object Data containing patternKeys in meta.data. +**`dictionaries`** List of dictionaries indicating clusters to be compared. +**`customNames`** List of custom names for clusters in corresponding order. +**`exclusive`** Boolean value for determining interpolation between params in clusters. +**`exportFolder`** Name of folder to store exported graphs and CSV files. +**`ANOVAwidth`** Width of ANOVA png. +**`ANOVAheight`** Height of ANOVA png. +**`CIwidth`** Width of CI png. +**`CIheight`** Height of CI png. +**`CIspacing`** Spacing between each CI in CI graph. + + +### Output +`multivariateAnalysisR` returns a sorted list of the generated ANOVA and CI values. It also exports two pairs of exported PNG/CSV files: one for ANOVA analysis, another for CI. From the ANOVA analysis, researchers can see the general ranking of differential uses of patterns across the specified clusters. From the CI analysis, researchers can identify the specific differential use cases between every pair of clusters. + +### Comparing differential uses of patterns across different clusters +Demonstrative example will be added soon. + +# References + + + + + diff --git a/vignettes/projectR.bib b/vignettes/projectR.bib new file mode 100644 index 0000000..2b203e8 --- /dev/null +++ b/vignettes/projectR.bib @@ -0,0 +1,99 @@ +%% Created using Papers on Tue, 21 Mar 2017. +%% http://papersapp.com/papers/ + +@article{Li:2004ey, +author = {Li, Q and Ye, J and Kambhamettu, C}, +title = {{Linear projection methods in face recognition under unconstrained illuminations: A comparative study}}, +journal = {Computer Vision and Pattern {\ldots}}, +year = {2004} +} + +@article{Fertig:2010ei, +author = {Fertig, Elana J and Ding, Jie and Favorov, Alexander V and Parmigiani, Giovanni and Ochs, Michael F}, +title = {{CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data.}}, +journal = {Bioinformatics}, +year = {2010}, +volume = {26}, +number = {21}, +pages = {2792--2793}, +month = nov +} + +@article{Baffi:1999jz, +author = {Baffi, G and Martin, E B and Morris, A J}, +title = {{Non-linear projection to latent structures revisited: the quadratic PLS algorithm}}, +journal = {Computers {\&} Chemical Engineering}, +year = {1999}, +volume = {23}, +number = {3}, +pages = {395--411}, +month = feb +} + +@book{Anonymous:kur3KWsv, +title = {{Non-Standard Parameter Adaptation for Exploratory Data Analysis}} +} + +@article{Pan:2010dm, +author = {Pan, Sinno Jialin and Yang, Qiang}, +title = {{A Survey on Transfer Learning}}, +journal = {IEEE Transactions on Knowledge and Data Engineering}, +year = {2010}, +volume = {22}, +number = {10}, +pages = {1345--1359} +} + +@incollection{Barbakh:2009bw, +author = {Barbakh, Wesam Ashour and Wu, Ying and Fyfe, Colin}, +title = {{Review of Linear Projection Methods}}, +booktitle = {Non-Standard Parameter Adaptation for Exploratory Data Analysis}, +year = {2009}, +pages = {29--48}, +publisher = {Springer Berlin Heidelberg}, +address = {Berlin, Heidelberg} +} + +@article{Smyth:2004vq, +author = {Smyth, Gordon K}, +title = {{Linear models and empirical bayes methods for assessing differential expression in microarray experiments}}, +journal = {Stat Appl Genet Mol Biol}, +year = {2004}, +volume = {3}, +number = {1}, +pages = {3} +} + +@article{Sibisi1997, +author = {Sibisi, Sibusiso and Skilling, John}, +title = {Prior Distributions on Measure Space}, +journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, +volume = {59}, +number = {1}, +pages = {217-235}, +keywords = {density estimation, infinite divisibility, infinitely divisible process, kernel function, Lévy measure, π-process, spatial correlation}, +doi = {10.1111/1467-9868.00065}, +url = { +https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/1467-9868.00065}, +eprint = {https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/1467-9868.00065}, +abstract = {A measure is the formal representation of the non-negative additive functions that abound in science. We review and develop the art of assigning Bayesian priors to measures. Where necessary, spatial correlation is delegated to correlating kernels imposed on otherwise uncorrelated priors. The latter must be infinitely divisible (ID) and hence described by the Lévy–Khinchin representation. Thus the fundamental object is the Lévy measure, the choice of which corresponds to different ID process priors. The general case of a Lévy measure comprising a mixture of assigned base measures leads to a prior process comprising a convolution of corresponding processes. Examples involving a single base measure are the gamma process, the Dirichlet process (for the normalized case) and the Poisson process. We also discuss processes that we call the supergamma and super-Dirichlet processes, which are double base measure generalizations of the gamma and Dirichlet processes. Examples of multiple and continuum base measures are also discussed. We conclude with numerical examples of density estimation.}, +year = {1997} +} + +@article{Ochs2006, +author="Wang, Guoli +and Kossenkov, Andrew V. +and Ochs, Michael F.", +title="LS-NMF: A modified non-negative matrix factorization algorithm utilizing uncertainty estimates", +journal="BMC Bioinformatics", +year="2006", +month="Mar", +day="28", +volume="7", +number="1", +pages="175", +abstract="Non-negative matrix factorisation (NMF), a machine learning algorithm, has been applied to the analysis of microarray data. A key feature of NMF is the ability to identify patterns that together explain the data as a linear combination of expression signatures. Microarray data generally includes individual estimates of uncertainty for each gene in each condition, however NMF does not exploit this information. Previous work has shown that such uncertainties can be extremely valuable for pattern recognition.", +issn="1471-2105", +doi="10.1186/1471-2105-7-175", +url="https://doi.org/10.1186/1471-2105-7-175" +} diff --git a/vignettes/projectR.html b/vignettes/projectR.html new file mode 100644 index 0000000..4fc7ea7 --- /dev/null +++ b/vignettes/projectR.html @@ -0,0 +1,1410 @@ + + + + + + + + + + + + + + + + + + + +projectR Vignette + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +

Contents

+ + +
+

1 Introduction

+

Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics—genomics, transcriptomics, epigenomics, proteomics, etc.—each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for in silico experiments, or additional information on the algorithm, see Stein-O’Brien, et al and Sharma, et al.

+
+
+

2 Getting started with projectR

+
+

2.1 Installation Instructions

+

For automatic Bioconductor package installation, start R, and run:

+
BiocManager::install("genesofeve/projectR@projectionDriveR")
+
+
+

2.2 Methods

+

Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to “omic” analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless.

+
+
+

2.3 The base projectR function

+

The generic projectR function is executed as follows:

+
library(projectR)
+projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false)
+
+

2.3.1 Input Arguments

+

The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e. a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the dataNames and loadingNames arguments can be manually supplied by the user.

+

The arguments are as follows:
+data a dataset to be projected into the pattern space
+loadings a matrix of continous values with unique rownames to be projected
+dataNames a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by rownames(data). Order of names in vector must match order of rows in data.
+loadingsNames a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by rownames(loadings). Order of names in vector must match order of rows in loadings.
+NP vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.
+full logical indicating whether to return the full model solution. By default only the new pattern object is returned.

+

The loadings argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF).

+
+
+

2.3.2 Output

+

The basic output of the base projectR function, i.e. full=FALSE, returns projectionPatterns representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. full=TRUE, returns projectionFit, a list containing projectionPatterns and Projection. The Projection object contains additional information from the proceedure used to obtain the projectionPatterns. For the the the base projectR function, Projection is the full lmFit model from the package limma.

+
+
+
+
+

3 PCA projection

+

Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, \(V\), and \(\Re^n\) which preserves inner products. If \(V\) is an inner product space over \(\Re\) with orthonormal basis \(B = v_1,...,v_n\) and \(v \epsilon V s.t [v]_B = (r_1,...,r_n)\), then finding the coordinate of \(v_i\) in \(v\) is precisely the inner product of \(v\) with \(v_i\), i.e. \(r_i = \langle v,v_i \rangle\). This formulation is implemented for only those genes belonging to both the new data and the PC space. The projectR function has S4 method for class prcomp.

+
+

3.1 Obtaining PCs to project.

+
# data to define PCs
+library(projectR)
+data(p.RNAseq6l3c3t)
+
+# do PCA on RNAseq6l3c3t expression data
+pc.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t))
+pcVAR <- round(((pc.RNAseq6l3c3t$sdev)^2/sum(pc.RNAseq6l3c3t$sdev^2))*100,2)
+dPCA <- data.frame(cbind(pc.RNAseq6l3c3t$x,pd.RNAseq6l3c3t))
+
+#plot pca
+library(ggplot2)
+setCOL <- scale_colour_manual(values = c("blue","black","red"), name="Condition:")
+setFILL <- scale_fill_manual(values = c("blue","black","red"),guide = FALSE)
+setPCH <- scale_shape_manual(values=c(23,22,25,25,21,24),name="Cell Line:")
+
+pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line,
+        fill=ID.cond)) +
+        geom_point(aes(size=days),alpha=.6)+
+        setCOL + setPCH  + setFILL +
+        scale_size_area(breaks = c(2,4,6), name="Day") +
+        theme(legend.position=c(0,0), legend.justification=c(0,0),
+              legend.direction = "horizontal",
+              panel.background = element_rect(fill = "white",colour=NA),
+              legend.background = element_rect(fill = "transparent",colour=NA),
+              plot.title = element_text(vjust = 0,hjust=0,face="bold")) +
+        labs(title = "PCA of hPSC PolyA RNAseq",
+            x=paste("PC1 (",pcVAR[1],"% of varience)",sep=""),
+            y=paste("PC2 (",pcVAR[2],"% of varience)",sep=""))
+
+
+

3.2 Projecting prcomp objects

+
# data to project into PCs from RNAseq6l3c3t expression data
+data(p.ESepiGen4c1l)
+
+library(projectR)
+PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t,
+full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]])
+
## [1] "93 row names matched between data and loadings"
+## [1] "Updated dimension of data: 93 9"
+
pd.ESepiGen4c1l<-data.frame(Condition=sapply(colnames(p.ESepiGen4c1l$mRNA.Seq),
+  function(x) unlist(strsplit(x,'_'))[1]),stringsAsFactors=FALSE)
+pd.ESepiGen4c1l$color<-c(rep("red",2),rep("green",3),rep("blue",2),rep("black",2))
+names(pd.ESepiGen4c1l$color)<-pd.ESepiGen4c1l$Cond
+
+dPCA2ESepi<- data.frame(cbind(t(PCA2ESepi[[1]]),pd.ESepiGen4c1l))
+
+#plot pca
+library(ggplot2)
+setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"),
+  guide = guide_legend(title="Lineage"))
+
+pPC2ESepiGen4c1l <- ggplot(dPCA2ESepi, aes(x=PC1, y=PC2, colour=Condition)) +
+  geom_point(size=5) + setEpiCOL +
+  theme(legend.position=c(0,0), legend.justification=c(0,0),
+  panel.background = element_rect(fill = "white"),
+  legend.direction = "horizontal",
+  plot.title = element_text(vjust = 0,hjust=0,face="bold")) +
+  labs(title = "Encode RNAseq in target PC1 & PC2",
+  x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""),
+  y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep=""))
+
+
+
+

4 NMF projection

+

NMF decomposes a data matrix of \(D\) with \(N\) genes as rows and \(M\) samples as columns, into two matrices, as \(D ~ AP\). The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (\({\bf{P}}\)) and the corresponding distribution matrix of weights (\({\bf{A}}\)) whose product forms a mock data matrix (\({\bf{M}}\)) that represents the gene-wise data \({\bf{D}}\) within noise limits (\(\boldsymbol{\varepsilon}\)). That is, +\[\begin{equation} +{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon} ..............(1) +\label{eq:matrixDecomp} +\end{equation}\] +The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling.

+

Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the limma package. The projectR function has S4 method for class Linear Embedding Matrix, LME.

+
library(projectR)
+projectR(data, loadings,dataNames = NULL, loadingsNames = NULL,
+     NP = NA, full = FALSE)
+
+

4.0.1 Input Arguments

+

The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names–rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running.

+

The arguments are as follows:

+

data a target dataset to be projected into the pattern space
+loadings a CogapsResult object
+dataNames rownames (eg. gene names) of the target dataset, if different from existing rownames of data
+loadingsNames loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames
+NP vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.
+full logical indicating whether to return the full model solution. By default only the new pattern object is returned.

+
+
+

4.0.2 Output

+

The basic output of the base projectR function, i.e. full=FALSE, returns projectionPatterns representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e. full=TRUE, returns projectionFit, a list containing projectionPatterns and Projection. The Projection object contains additional information from the procedure used to obtain the projectionPatterns. For the the the base projectR function, Projection is the full lmFit model from the package limma.

+
+
+

4.1 Obtaining CoGAPS patterns to project.

+
# get data
+library(projectR)
+AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data
+AP <- AP$Amean
+# heatmap of gene weights for CoGAPs patterns
+library(gplots)
+
## 
+## Attaching package: 'gplots'
+
## The following object is masked from 'package:stats':
+## 
+##     lowess
+
par(mar=c(1,1,1,1))
+pNMF<-heatmap.2(as.matrix(AP),col=bluered, trace='none',
+          distfun=function(c) as.dist(1-cor(t(c))) ,
+          cexCol=1,cexRow=.5,scale = "row",
+          hclustfun=function(x) hclust(x, method="average")
+      )
+

+
+
+

4.2 Projecting CoGAPS objects

+
# data to project into PCs from RNAseq6l3c3t expression data
+library(projectR)
+data('p.ESepiGen4c1l4')
+
## Warning in data("p.ESepiGen4c1l4"): data set 'p.ESepiGen4c1l4' not found
+
data('p.RNAseq6l3c3t')
+
+NMF2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=AP,full=TRUE,
+    dataNames=map.ESepiGen4c1l[["GeneSymbols"]])
+
## [1] "93 row names matched between data and loadings"
+## [1] "Updated dimension of data: 93 9"
+
dNMF2ESepi<- data.frame(cbind(t(NMF2ESepi),pd.ESepiGen4c1l))
+
+#plot pca
+library(ggplot2)
+setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"),
+guide = guide_legend(title="Lineage"))
+
+pNMF2ESepiGen4c1l <- ggplot(dNMF2ESepi, aes(x=X1, y=X2, colour=Condition)) +
+  geom_point(size=5) + setEpiCOL +
+  theme(legend.position=c(0,0), legend.justification=c(0,0),
+  panel.background = element_rect(fill = "white"),
+  legend.direction = "horizontal",
+  plot.title = element_text(vjust = 0,hjust=0,face="bold"))
+  labs(title = "Encode RNAseq in target PC1 & PC2",
+  x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""),
+  y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep=""))
+
## $x
+## [1] "Projected PC1 (18.37% of varience)"
+## 
+## $y
+## [1] "Projected PC2 (17.16% of varience)"
+## 
+## $title
+## [1] "Encode RNAseq in target PC1 & PC2"
+## 
+## attr(,"class")
+## [1] "labels"
+
+
+
+

5 Clustering projection

+

As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the “projection” of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, cluster2pattern uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a pclust object which can serve as input to projectR. Alternatively, the intersectoR function can be used to test for significant overlap between two clustering objects. Both cluster2pattern and intersectoR methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections.

+
+

5.1 cluster2pattern

+

cluster2pattern uses the corelation of each genes expression to the mean of each cluster to define continuous weights.

+
library(projectR)
+data(p.RNAseq6l3c3t)
+
+
+nP<-5
+kClust<-kmeans(t(p.RNAseq6l3c3t),centers=nP)
+kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t)
+kpattern
+
+cluster2pattern(clusters = NA, NP = NA, data = NA)
+
+

5.1.1 Input Arguments

+

The inputs that must be set each time are the clusters and data.

+

The arguments are as follows:

+

clusters a clustering object
+NP either the number of clusters desired or the subset of clusters to use
+data data used to make clusters object

+
+
+

5.1.2 Output

+

The output of the cluster2pattern function is a pclust class object; specifically, a matrix of genes (rows) by clusters (columns). A gene’s value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene’s value is the correlation of the gene’s expression to the mean of that cluster.

+
+
+
+

5.2 intersectoR

+

intersectoR function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for hclust and kmeans objects are also available.

+
library(projectR)
+intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL)
+
+

5.2.1 Input Arguments

+

The inputs that must be set each time are the clusters and data.

+

The arguments are as follows:

+

pSet1 a list for a set of patterns where each entry is a set of genes associated with a single pattern
+pSet2 a list for a second set of patterns where each entry is a set of genes associated with a single pattern
+pval the maximum p-value considered significant
+full logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.
+k numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order

+
+
+

5.2.2 Output

+

The output of the intersectoR function is a summary matrix showing the sets with statistically significant overlap under the specified \(p\)-value threshold based on a hypergeometric test. If full==TRUE the full data frame of significantly overlapping sets will also be returned.

+
+
+
+
+

6 Correlation based projection

+

Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the projectR function. This matrix can be user-generated or the result of the correlateR function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic projectR function. The correlateR function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows.

+
+

6.1 correlateR

+
library(projectR)
+correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...)
+
+

6.1.1 Input Arguments

+

The inputs that must be set each time are only the genes and data, with all other inputs having default values.

+

The arguments are as follows:

+

genes gene or character vector of genes for reference expression pattern dat
+data matrix or data frame with genes to be used for to calculate correlation
+threshtype Default “R” indicates thresholding by R value or equivalent. Alternatively, “N” indicates a numerical cut off.
+threshold numeric indicating value at which to make threshold
+absR logical indicating where to include both positive and negatively correlated genes
+... addtion imputes to the cor function

+
+
+

6.1.2 Output

+

The output of the correlateR function is a correlateR class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the correlateR object before using it as input to the projectR function.

+
+
+
+

6.2 Obtaining and visualizing correlateR objects.

+
# data to
+library(projectR)
+data("p.RNAseq6l3c3t")
+
+# get genes correlated to T
+cor2T<-correlateR(genes="T", dat=p.RNAseq6l3c3t, threshtype="N", threshold=10, absR=TRUE)
+cor2T <- cor2T@corM
+### heatmap of genes more correlated to T
+indx<-unlist(sapply(cor2T,rownames))
+indx <- as.vector(indx)
+colnames(p.RNAseq6l3c3t)<-pd.RNAseq6l3c3t$sampleX
+library(reshape2)
+pm.RNAseq6l3c3t<-melt(cbind(p.RNAseq6l3c3t[indx,],indx))
+
## Using indx as id variables
+
library(gplots)
+library(ggplot2)
+library(viridis)
+
## Loading required package: viridisLite
+
pCorT<-ggplot(pm.RNAseq6l3c3t, aes(variable, indx, fill = value)) +
+  geom_tile(colour="gray20", size=1.5, stat="identity") +
+  scale_fill_viridis(option="B") +
+  xlab("") +  ylab("") +
+  scale_y_discrete(limits=indx) +
+  ggtitle("Ten genes most highly pos & neg correlated with T") +
+  theme(
+    panel.background = element_rect(fill="gray20"),
+    panel.border = element_rect(fill=NA,color="gray20", size=0.5, linetype="solid"),
+    panel.grid.major = element_blank(),
+    panel.grid.minor = element_blank(),
+    axis.line = element_blank(),
+    axis.ticks = element_blank(),
+    axis.text = element_text(size=rel(1),hjust=1),
+    axis.text.x = element_text(angle = 90,vjust=.5),
+    legend.text = element_text(color="white", size=rel(1)),
+    legend.background = element_rect(fill="gray20"),
+    legend.position = "bottom",
+    legend.title=element_blank()
+)
+
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
+## ℹ Please use `linewidth` instead.
+## This warning is displayed once every 8 hours.
+## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
+## generated.
+
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
+## ℹ Please use the `linewidth` argument instead.
+## This warning is displayed once every 8 hours.
+## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
+## generated.
+

+
+
+

6.3 Projecting correlateR objects.

+
# data to project into from RNAseq6l3c3t expression data
+data(p.ESepiGen4c1l)
+
+library(projectR)
+cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE,
+    dataNames=map.ESepiGen4c1l$GeneSymbols)
+
## [1] "9 row names matched between data and loadings"
+## [1] "Updated dimension of data: 9 9"
+
+
+
+

7 Differential features identification.

+
+

7.1 projectionDriveR

+

Given loadings that define the weight of features (genes) in a given latent space (e.g. PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages.

+
library(projectR)
+projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL,
+                 pvalue, pattern_name, display = T, normalize_pattern = T, mode = "CI")
+
+
+

7.1.1 Input Arguments

+

The required inputs are two feature by sample (e.g. gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth.

+

The arguments for projectionDriveR are:

+

cellgroup1 Matrix 1 with features as rows, samples as columns.
+cellgroup2 Matrix 2 with features as rows, samples as columns.
+loadings Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space
+loadingsNames Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used
+pattern_name the column name of the loadings by which the features will be weighted
+pvalue Determines the significance of the confidence interval to be calculated between the difference of means
+display Boolean. Whether or not to plot the estimates of significant features. Default = T
+normalize_pattern Boolean. Whether or not to normalize the average feature weight. Default = T
+mode ‘CI’ or ‘PV’. Specifies whether to run projectionDriveR in confidence interval mode or to generate p values. Default = “CI”

+
+
+

7.1.2 Output

+

The output of projectionDriveR is a list of length five mean_ci holds the confidence intervals for the difference in means for all features, weighted_mean_ci holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, sig_genes is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (unweighted_sig_genes), the weighted mean confidence intervals (weighted_sig_genes) and genes shared between the two (significant_shared_genes) . plotted_ci returns the ggplot figure of the confidence intervals, see plotConfidenceIntervals for documentation.

+
+
+

7.1.3 Identifying differential features associated with learned patterns

+
options(width = 60)
+library(projectR)
+library(dplyr, warn.conflicts = F)
+library(magick)
+
+#gene weights x pattern
+data("retinal_patterns")
+
+#size-normed, log expression
+data("microglial_counts")
+
+#size-normed, log expression
+data("glial_counts")
+
+#the features by which to weight the difference in expression 
+pattern_to_weight <- "Pattern.24"
+drivers <- projectionDriveR(microglial_counts, #expression matrix
+                                       glial_counts, #expression matrix
+                                       loadings = retinal_patterns, #feature x pattern dataframe
+                                       loadingsNames = NULL,
+                                       pattern_name = pattern_to_weight, #column name
+                                       pvalue = 1e-5, #pvalue before bonferroni correction
+                                       display = T,
+                                       normalize_pattern = T, #normalize feature weights
+                                       mode = "CI") #confidence interval mode
+
## [1] "Mode: CI"
+## [1] "2996 row names matched between datasets"
+## [1] "Updated dimension of data: 2996"
+## the length of shared genes are: 253
+

+
conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,]
+
+str(conf_intervals)
+
## 'data.frame':    253 obs. of  3 variables:
+##  $ low : num  1.86 0.158 -0.562 -0.756 0.155 ...
+##  $ high: num  2.03943 0.26729 -0.00197 -0.18521 0.23239 ...
+##  $ gene: chr  "ENSMUSG00000026126" "ENSMUSG00000025993" "ENSMUSG00000025959" "ENSMUSG00000045658" ...
+
+
+
+

7.2 plotConfidenceIntervals

+
+

7.2.1 Input

+

The arguments for plotConfidenceIntervals are:

+

confidence_intervals A dataframe of features x estimates
+interval_name names of columns that contain the low and high estimates, respectively. (default: c(“low”,“high”)) +pattern_name string to use as the title for the plots
+sort Boolean. Whether or not to sort genes by their estimates (default = T)
+genes a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.)
+weights weights of features to include as annotation (default = NULL will not include heatmap)
+weights_clip quantile of data to clip color scale for improved visualization (default: 0.99)
+weights_vis_norm Which processed version of weights to visualize as a heatmap. One of c(“none”, “quantile”). default = “none”
+weighted Boolean. Specifies whether confidence intervals are weighted by a pattern or not. Default = “F”

+
+
+

7.2.2 Output

+

A list of the length three that includes confidence interval plots and relevant info. ci_estimates_plot is the point-range plot for the provided estimates. If called from within projectionDriveR, the unweighted estimates are used. feature_order is the vector of gene names in the order shown in the figure. weights_heatmap is a heatmap annotation of the gene loadings, in the same order as above.

+
+
+

7.2.3 Customize plotting of confidence intervals

+
suppressWarnings(library(cowplot))
+#order in ascending order of estimates
+conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid)
+gene_order <- rownames(conf_intervals)
+
+#add text labels for top and bottom n genes
+conf_intervals$label_name <- NA_character_
+n <- 2
+idx <- c(1:n, (dim(conf_intervals)[1]-(n-1)):dim(conf_intervals)[1])
+gene_ids <- gene_order[idx]
+conf_intervals$label_name[idx] <- gene_ids
+
+#the labels above can now be used as ggplot aesthetics
+plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals
+                                      sort = F, #should genes be sorted by estimates
+                                      weights = drivers$normalized_weights[rownames(conf_intervals)],
+                                      pattern_name = pattern_to_weight,
+                                      weights_clip = 0.99,
+                                      weights_vis_norm = "none")
+
+pl1 <- plots_list[["ci_estimates_plot"]] +
+  ggrepel::geom_label_repel(aes(label = label_name), max.overlaps = 20, force = 50)
+
+pl2 <- plots_list[["weights_heatmap"]]
+
+#now plot the weighted differences
+weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,]
+plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals,
+                                               sort = F,
+                                               pattern_name = pattern_to_weight,
+                                               weighted = T)
+
+pl3 <- plots_list_weighted[["ci_estimates_plot"]] +
+  xlab("Difference in weighted group means") +
+  theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank())
+
+cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3)
+
## Warning: Removed 249 rows containing missing values
+## (`geom_label_repel()`).
+

+
+
+
+

7.3 multivariateAnalysisR

+

This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. multivariateAnalysisR performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function.

+
library(projectR)
+multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj,
+                                  dictionaries, customNames = NULL, exclusive = TRUE,
+                                  exportFolder = "", ANOVAwidth = 1000,
+                                  ANOVAheight = 1000, CIwidth = 1000, CIheight = 1000,
+                                  CIspacing = 1)
+
+

7.3.1 Input Arguments

+

The required inputs are patternKeys (list of strings indicating the patterns to be evaluated), seuratobj (the Seurat Object data containing both clusters and patterns), and dictionaries (list of dictionary where each dictionary indicates the conditions each corresponding cluster has to satisfy).

+

The arguments for multivariateAnalysisR are:

+

significanceLevel Double value for testing significance in ANOVA test.
+patternKeys List of strings indicating pattern subsets from seuratobj to be analyzed.
+seuratobj Seurat Object Data containing patternKeys in meta.data. +dictionaries List of dictionaries indicating clusters to be compared.
+customNames List of custom names for clusters in corresponding order. +exclusive Boolean value for determining interpolation between params in clusters. +exportFolder Name of folder to store exported graphs and CSV files. +ANOVAwidth Width of ANOVA png. +ANOVAheight Height of ANOVA png. +CIwidth Width of CI png. +CIheight Height of CI png. +CIspacing Spacing between each CI in CI graph.

+
+
+

7.3.2 Output

+

multivariateAnalysisR returns a sorted list of the generated ANOVA and CI values. It also exports two pairs of exported PNG/CSV files: one for ANOVA analysis, another for CI. From the ANOVA analysis, researchers can see the general ranking of differential uses of patterns across the specified clusters. From the CI analysis, researchers can identify the specific differential use cases between every pair of clusters.

+
+
+

7.3.3 Comparing differential uses of patterns across different clusters

+

Demonstrative example will be added soon.

+
+
+
+
+

References

+
+
+Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. Review of Linear Projection Methods.” In Non-Standard Parameter Adaptation for Exploratory Data Analysis, 29–48. Berlin, Heidelberg: Springer Berlin Heidelberg. +
+
+Sibisi, Sibusiso, and John Skilling. 1997. “Prior Distributions on Measure Space.” Journal of the Royal Statistical Society: Series B (Statistical Methodology) 59 (1): 217–35. https://doi.org/10.1111/1467-9868.00065. +
+
+Wang, Guoli, Andrew V. Kossenkov, and Michael F. Ochs. 2006. “LS-NMF: A Modified Non-Negative Matrix Factorization Algorithm Utilizing Uncertainty Estimates.” BMC Bioinformatics 7 (1): 175. https://doi.org/10.1186/1471-2105-7-175. +
+
+
+ + + + +
+ + + + + + + + + + + + + + + + + + diff --git a/vignettes/projectR.tex b/vignettes/projectR.tex new file mode 100644 index 0000000..5789790 --- /dev/null +++ b/vignettes/projectR.tex @@ -0,0 +1,758 @@ +\documentclass[]{article} +\usepackage{lmodern} +\usepackage{amssymb,amsmath} +\usepackage{ifxetex,ifluatex} +\usepackage{fixltx2e} % provides \textsubscript +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} +\else % if luatex or xelatex + \ifxetex + \usepackage{mathspec} + \else + \usepackage{fontspec} + \fi + \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} +\fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +% use microtype if available +\IfFileExists{microtype.sty}{% +\usepackage{microtype} +\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} + + +\usepackage{longtable,booktabs} +\usepackage{graphicx} +% grffile has become a legacy package: https://ctan.org/pkg/grffile +\IfFileExists{grffile.sty}{% +\usepackage{grffile} +}{} +\makeatletter +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} +\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} +\makeatother +% Scale images if necessary, so that they will not overflow the page +% margins by default, and it is still possible to overwrite the defaults +% using explicit options in \includegraphics[width, height, ...]{} +\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} +\IfFileExists{parskip.sty}{% +\usepackage{parskip} +}{% else +\setlength{\parindent}{0pt} +\setlength{\parskip}{6pt plus 2pt minus 1pt} +} +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +\setcounter{secnumdepth}{5} + +%%% Use protect on footnotes to avoid problems with footnotes in titles +\let\rmarkdownfootnote\footnote% +\def\footnote{\protect\rmarkdownfootnote} + +%%% Change title format to be more compact +\usepackage{titling} + +% Create subtitle command for use in maketitle +\providecommand{\subtitle}[1]{ + \posttitle{ + \begin{center}\large#1\end{center} + } +} + +\setlength{\droptitle}{-2em} + +\RequirePackage[]{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/Bioconductor} + +\bioctitle[]{projectR Vignette} + \pretitle{\vspace{\droptitle}\centering\huge} + \posttitle{\par} +\author{Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien} + \preauthor{\centering\large\emph} + \postauthor{\par} + \predate{\centering\large\emph} + \postdate{\par} + \date{20 May 2022} + +% code highlighting +\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251} +\makeatletter +\@ifundefined{AddToHook}{}{\AddToHook{package/xcolor/after}{\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251}}} +\makeatother +\newcommand{\hlnum}[1]{\textcolor[rgb]{0.816,0.125,0.439}{#1}}% +\newcommand{\hlstr}[1]{\textcolor[rgb]{0.251,0.627,0.251}{#1}}% +\newcommand{\hlcom}[1]{\textcolor[rgb]{0.502,0.502,0.502}{\textit{#1}}}% +\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% +\newcommand{\hlstd}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% +\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.125,0.125,0.941}{#1}}% +\newcommand{\hlkwb}[1]{\textcolor[rgb]{0,0,0}{#1}}% +\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% +\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.878,0.439,0.125}{#1}}% +\let\hlipl\hlkwb +% +\usepackage{fancyvrb} +\newcommand{\VerbBar}{|} +\newcommand{\VERB}{\Verb[commandchars=\\\{\}]} +\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} +% +\newenvironment{Shaded}{\begin{myshaded}}{\end{myshaded}} +% set background for result chunks +\let\oldverbatim\verbatim +\renewenvironment{verbatim}{\color{codecolor}\begin{myshaded}\begin{oldverbatim}}{\end{oldverbatim}\end{myshaded}} +% +\newcommand{\KeywordTok}[1]{\hlkwd{#1}} +\newcommand{\DataTypeTok}[1]{\hlkwc{#1}} +\newcommand{\DecValTok}[1]{\hlnum{#1}} +\newcommand{\BaseNTok}[1]{\hlnum{#1}} +\newcommand{\FloatTok}[1]{\hlnum{#1}} +\newcommand{\ConstantTok}[1]{\hlnum{#1}} +\newcommand{\CharTok}[1]{\hlstr{#1}} +\newcommand{\SpecialCharTok}[1]{\hlstr{#1}} +\newcommand{\StringTok}[1]{\hlstr{#1}} +\newcommand{\VerbatimStringTok}[1]{\hlstr{#1}} +\newcommand{\SpecialStringTok}[1]{\hlstr{#1}} +\newcommand{\ImportTok}[1]{{#1}} +\newcommand{\CommentTok}[1]{\hlcom{#1}} +\newcommand{\DocumentationTok}[1]{\hlcom{#1}} +\newcommand{\AnnotationTok}[1]{\hlcom{#1}} +\newcommand{\CommentVarTok}[1]{\hlcom{#1}} +\newcommand{\OtherTok}[1]{{#1}} +\newcommand{\FunctionTok}[1]{\hlstd{#1}} +\newcommand{\VariableTok}[1]{\hlstd{#1}} +\newcommand{\ControlFlowTok}[1]{\hlkwd{#1}} +\newcommand{\OperatorTok}[1]{\hlopt{#1}} +\newcommand{\BuiltInTok}[1]{{#1}} +\newcommand{\ExtensionTok}[1]{{#1}} +\newcommand{\PreprocessorTok}[1]{\textit{#1}} +\newcommand{\AttributeTok}[1]{{#1}} +\newcommand{\RegionMarkerTok}[1]{{#1}} +\newcommand{\InformationTok}[1]{\textcolor{messagecolor}{#1}} +\newcommand{\WarningTok}[1]{\textcolor{warningcolor}{#1}} +\newcommand{\AlertTok}[1]{\textcolor{errorcolor}{#1}} +\newcommand{\ErrorTok}[1]{\textcolor{errorcolor}{#1}} +\newcommand{\NormalTok}[1]{\hlstd{#1}} +% +\AtBeginDocument{\bibliographystyle{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/unsrturl}} + + +\begin{document} +\maketitle + + +{ +\setcounter{tocdepth}{2} +\tableofcontents +\newpage +} +\hypertarget{introduction}{% +\section{Introduction}\label{introduction}} + +Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics---genomics, transcriptomics, epigenomics, proteomics, etc.---each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for \emph{in silico} experiments, or additional information on the algorithm, see \href{https://www.sciencedirect.com/science/article/pii/S2405471219301462}{Stein-O'Brien, et al}. + +\hypertarget{getting-started-with-projectr}{% +\section{Getting started with projectR}\label{getting-started-with-projectr}} + +\hypertarget{installation-instructions}{% +\subsection{Installation Instructions}\label{installation-instructions}} + +For automatic Bioconductor package installation, start R, and run: + +\begin{verbatim} +BiocManager::install("projectR") +\end{verbatim} + +\hypertarget{methods}{% +\subsection{Methods}\label{methods}} + +Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to ``omic'' analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless. + +\hypertarget{the-base-projectr-function}{% +\subsection{The base projectR function}\label{the-base-projectr-function}} + +The generic projectR function is executed as follows: + +\begin{verbatim} +library(projectR) +projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) +\end{verbatim} + +\hypertarget{input-arguments}{% +\subsubsection{Input Arguments}\label{input-arguments}} + +The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e.~a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the /code\{dataNames\} and /code\{loadingNames\} arguments can be manually supplied by the user. + +The arguments are as follows: + +\begin{description} +\item[data]{a dataset to be projected into the pattern space} +\item[loadings]{a matrix of continous values with unique rownames to be projected} +\item[dataNames]{a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by \texttt{rownames(data)}. Order of names in vector must match order of rows in data.} +\item[loadingsNames]{a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by \texttt{rownames(loadings)}. Order of names in vector must match order of rows in loadings.} +\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} +\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} +\end{description} + +The \texttt{loadings} argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF). + +\hypertarget{output}{% +\subsubsection{Output}\label{output}} + +The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the proceedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. + +\hypertarget{pca-projection}{% +\section{PCA projection}\label{pca-projection}} + +Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, \(V\), and \(\Re^n\) which preserves inner products. If \(V\) is an inner product space over \(\Re\) with orthonormal basis \(B = v_1,...,v_n\) and \(v \epsilon V s.t [v]_B = (r_1,...,r_n)\), then finding the coordinate of \(v_i\) in \(v\) is precisely the inner product of \(v\) with \(v_i\), i.e.~\(r_i = \langle v,v_i \rangle\). This formulation is implemented for only those genes belonging to both the new data and the PC space. The \texttt{projectR} function has S4 method for class \texttt{prcomp}. + +\hypertarget{obtaining-pcs-to-project.}{% +\subsection{Obtaining PCs to project.}\label{obtaining-pcs-to-project.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to define PCs} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{data}\NormalTok{(p.RNAseq6l3c3t)} + +\CommentTok{\# do PCA on RNAseq6l3c3t expression data} +\NormalTok{pc.RNAseq6l3c3t<{-}}\KeywordTok{prcomp}\NormalTok{(}\KeywordTok{t}\NormalTok{(p.RNAseq6l3c3t))} +\NormalTok{pcVAR <{-}}\StringTok{ }\KeywordTok{round}\NormalTok{(((pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev)}\OperatorTok{\^{}}\DecValTok{2}\OperatorTok{/}\KeywordTok{sum}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev}\OperatorTok{\^{}}\DecValTok{2}\NormalTok{))}\OperatorTok{*}\DecValTok{100}\NormalTok{,}\DecValTok{2}\NormalTok{)} +\NormalTok{dPCA <{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{x,pd.RNAseq6l3c3t))} + +\CommentTok{\#plot pca} +\KeywordTok{library}\NormalTok{(ggplot2)} +\NormalTok{setCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Condition:"}\NormalTok{)} +\NormalTok{setFILL <{-}}\StringTok{ }\KeywordTok{scale\_fill\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{),}\DataTypeTok{guide =} \OtherTok{FALSE}\NormalTok{)} +\NormalTok{setPCH <{-}}\StringTok{ }\KeywordTok{scale\_shape\_manual}\NormalTok{(}\DataTypeTok{values=}\KeywordTok{c}\NormalTok{(}\DecValTok{23}\NormalTok{,}\DecValTok{22}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{21}\NormalTok{,}\DecValTok{24}\NormalTok{),}\DataTypeTok{name=}\StringTok{"Cell Line:"}\NormalTok{)} + +\NormalTok{pPCA <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{ID.cond, }\DataTypeTok{shape=}\NormalTok{ID.line,} + \DataTypeTok{fill=}\NormalTok{ID.cond)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{size=}\NormalTok{days),}\DataTypeTok{alpha=}\NormalTok{.}\DecValTok{6}\NormalTok{)}\OperatorTok{+} +\StringTok{ }\NormalTok{setCOL }\OperatorTok{+}\StringTok{ }\NormalTok{setPCH }\OperatorTok{+}\StringTok{ }\NormalTok{setFILL }\OperatorTok{+} +\StringTok{ }\KeywordTok{scale\_size\_area}\NormalTok{(}\DataTypeTok{breaks =} \KeywordTok{c}\NormalTok{(}\DecValTok{2}\NormalTok{,}\DecValTok{4}\NormalTok{,}\DecValTok{6}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Day"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} + \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} + \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"transparent"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} + \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"PCA of hPSC PolyA RNAseq"}\NormalTok{,} + \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC1 ("}\NormalTok{,pcVAR[}\DecValTok{1}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} + \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC2 ("}\NormalTok{,pcVAR[}\DecValTok{2}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} +\end{Highlighting} +\end{Shaded} + +\hypertarget{projecting-prcomp-objects}{% +\subsection{Projecting prcomp objects}\label{projecting-prcomp-objects}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} +\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} + +\KeywordTok{library}\NormalTok{(projectR)} +\NormalTok{PCA2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(}\DataTypeTok{data =}\NormalTok{ p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{pc.RNAseq6l3c3t,} +\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{, }\DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} +\CommentTok{\#\# [1] "93 row names matched between data and loadings"} +\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} + +\NormalTok{pd.ESepiGen4c1l<{-}}\KeywordTok{data.frame}\NormalTok{(}\DataTypeTok{Condition=}\KeywordTok{sapply}\NormalTok{(}\KeywordTok{colnames}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq),} + \ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{unlist}\NormalTok{(}\KeywordTok{strsplit}\NormalTok{(x,}\StringTok{\textquotesingle{}\_\textquotesingle{}}\NormalTok{))[}\DecValTok{1}\NormalTok{]),}\DataTypeTok{stringsAsFactors=}\OtherTok{FALSE}\NormalTok{)} +\NormalTok{pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color<{-}}\KeywordTok{c}\NormalTok{(}\KeywordTok{rep}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"green"}\NormalTok{,}\DecValTok{3}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"black"}\NormalTok{,}\DecValTok{2}\NormalTok{))} +\KeywordTok{names}\NormalTok{(pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color)<{-}pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{Cond} + +\NormalTok{dPCA2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(PCA2ESepi[[}\DecValTok{1}\NormalTok{]]),pd.ESepiGen4c1l))} + +\CommentTok{\#plot pca} +\KeywordTok{library}\NormalTok{(ggplot2)} +\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} + \DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} + +\NormalTok{pPC2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} + \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} + \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} + \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} + \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} +## Warning: package 'gridExtra' was built under R version 4.0.5 +## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please +## use `guide = "none"` instead. +\end{verbatim} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-2-1} \end{adjustwidth} + +\hypertarget{nmf-projection}{% +\section{NMF projection}\label{nmf-projection}} + +NMF decomposes a data matrix of \(D\) with \(N\) genes as rows and \(M\) samples as columns, into two matrices, as \(D ~ AP\). The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (\({\bf{P}}\)) and the corresponding distribution matrix of weights (\({\bf{A}}\)) whose product forms a mock data matrix (\({\bf{M}}\)) that represents the gene-wise data \({\bf{D}}\) within noise limits (\(\boldsymbol{\varepsilon}\)). That is, +\begin{equation} +{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon}. +\label{eq:matrixDecomp} +\end{equation} +The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling. + +Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in \ref{eq:matrixDecomp} for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}} package. The \texttt{projectR} function has S4 method for class \texttt{Linear Embedding Matrix, LME}. + +\begin{verbatim} +library(projectR) +projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, + NP = NA, full = FALSE) +\end{verbatim} + +\hypertarget{input-arguments-1}{% +\subsubsection{Input Arguments}\label{input-arguments-1}} + +The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names--rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running. + +The arguments are as follows: + +\begin{description} +\item[data]{a target dataset to be projected into the pattern space} +\item[loadings]{a CogapsResult object} +\item[dataNames]{rownames (eg. gene names) of the target dataset, if different from existing rownames of data} +\item[loadingsNames] loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames +\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} +\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} +\end{description} + +\hypertarget{output-1}{% +\subsubsection{Output}\label{output-1}} + +The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the procedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. + +\hypertarget{obtaining-cogaps-patterns-to-project.}{% +\subsection{Obtaining CoGAPS patterns to project.}\label{obtaining-cogaps-patterns-to-project.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# get data} +\KeywordTok{library}\NormalTok{(projectR)} +\NormalTok{AP <{-}}\StringTok{ }\KeywordTok{get}\NormalTok{(}\KeywordTok{data}\NormalTok{(}\StringTok{"AP.RNAseq6l3c3t"}\NormalTok{)) }\CommentTok{\#CoGAPS run data} +\NormalTok{AP <{-}}\StringTok{ }\NormalTok{AP}\OperatorTok{$}\NormalTok{Amean} +\CommentTok{\# heatmap of gene weights for CoGAPs patterns} +\KeywordTok{library}\NormalTok{(gplots)} +\CommentTok{\#\# Warning: package \textquotesingle{}gplots\textquotesingle{} was built under R version 4.0.5} +\CommentTok{\#\# } +\CommentTok{\#\# Attaching package: \textquotesingle{}gplots\textquotesingle{}} +\CommentTok{\#\# The following object is masked from \textquotesingle{}package:projectR\textquotesingle{}:} +\CommentTok{\#\# } +\CommentTok{\#\# lowess} +\CommentTok{\#\# The following object is masked from \textquotesingle{}package:stats\textquotesingle{}:} +\CommentTok{\#\# } +\CommentTok{\#\# lowess} +\KeywordTok{par}\NormalTok{(}\DataTypeTok{mar=}\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{))} +\NormalTok{pNMF<{-}}\KeywordTok{heatmap.2}\NormalTok{(}\KeywordTok{as.matrix}\NormalTok{(AP),}\DataTypeTok{col=}\NormalTok{bluered, }\DataTypeTok{trace=}\StringTok{\textquotesingle{}none\textquotesingle{}}\NormalTok{,} + \DataTypeTok{distfun=}\ControlFlowTok{function}\NormalTok{(c) }\KeywordTok{as.dist}\NormalTok{(}\DecValTok{1}\OperatorTok{{-}}\KeywordTok{cor}\NormalTok{(}\KeywordTok{t}\NormalTok{(c))) ,} + \DataTypeTok{cexCol=}\DecValTok{1}\NormalTok{,}\DataTypeTok{cexRow=}\NormalTok{.}\DecValTok{5}\NormalTok{,}\DataTypeTok{scale =} \StringTok{"row"}\NormalTok{,} + \DataTypeTok{hclustfun=}\ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{hclust}\NormalTok{(x, }\DataTypeTok{method=}\StringTok{"average"}\NormalTok{)} +\NormalTok{ )} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-3-1} \end{adjustwidth} + +\hypertarget{projecting-cogaps-objects}{% +\subsection{Projecting CoGAPS objects}\label{projecting-cogaps-objects}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.ESepiGen4c1l4\textquotesingle{}}\NormalTok{)} +\CommentTok{\#\# Warning in data("p.ESepiGen4c1l4"): data set \textquotesingle{}p.ESepiGen4c1l4\textquotesingle{} not found} +\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.RNAseq6l3c3t\textquotesingle{}}\NormalTok{)} + +\NormalTok{NMF2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{AP,}\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{,} + \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} +\CommentTok{\#\# [1] "93 row names matched between data and loadings"} +\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} + +\NormalTok{dNMF2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(NMF2ESepi),pd.ESepiGen4c1l))} + +\CommentTok{\#plot pca} +\KeywordTok{library}\NormalTok{(ggplot2)} +\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} +\DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} + +\NormalTok{pNMF2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dNMF2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{X1, }\DataTypeTok{y=}\NormalTok{X2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} + \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} + \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{))} + \KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} + \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} + \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} +\CommentTok{\#\# $x} +\CommentTok{\#\# [1] "Projected PC1 (18.36\% of varience)"} +\CommentTok{\#\# } +\CommentTok{\#\# $y} +\CommentTok{\#\# [1] "Projected PC2 (17.15\% of varience)"} +\CommentTok{\#\# } +\CommentTok{\#\# $title} +\CommentTok{\#\# [1] "Encode RNAseq in target PC1 \& PC2"} +\CommentTok{\#\# } +\CommentTok{\#\# attr(,"class")} +\CommentTok{\#\# [1] "labels"} +\end{Highlighting} +\end{Shaded} + +\hypertarget{clustering-projection}{% +\section{Clustering projection}\label{clustering-projection}} + +As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the ``projection'' of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, \texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a \texttt{pclust} object which can serve as input to \texttt{projectR}. Alternatively, the \texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. Both \texttt{cluster2pattern} and \texttt{intersectoR} methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections. + +\hypertarget{cluster2pattern}{% +\subsection{cluster2pattern}\label{cluster2pattern}} + +\texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. + +\begin{verbatim} +library(projectR) +data(p.RNAseq6l3c3t) + + +nP<-5 +kClust<-kmeans(p.RNAseq6l3c3t,centers=nP) +kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t) +kpattern + +cluster2pattern(clusters = NA, NP = NA, data = NA) +\end{verbatim} + +\hypertarget{input-arguments-2}{% +\subsubsection{Input Arguments}\label{input-arguments-2}} + +The inputs that must be set each time are the clusters and data. + +The arguments are as follows: + +\begin{description} +\item[clusters]{a clustering object} +\item[NP]{either the number of clusters desired or the subset of clusters to use} +\item[data]{data used to make clusters object} +\end{description} + +\hypertarget{output-2}{% +\subsubsection{Output}\label{output-2}} + +The output of the \texttt{cluster2pattern} function is a \texttt{pclust} class object; specifically, a matrix of genes (rows) by clusters (columns). A gene's value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene's value is the correlation of the gene's expression to the mean of that cluster. + +\hypertarget{intersector}{% +\subsection{intersectoR}\label{intersector}} + +\texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for \texttt{hclust} and \texttt{kmeans} objects are also available. + +\begin{verbatim} +library(projectR) +intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) +\end{verbatim} + +\hypertarget{input-arguments-3}{% +\subsubsection{Input Arguments}\label{input-arguments-3}} + +The inputs that must be set each time are the clusters and data. + +The arguments are as follows: + +\begin{description} +\item[pSet1]{a list for a set of patterns where each entry is a set of genes associated with a single pattern} +\item[pSet2]{a list for a second set of patterns where each entry is a set of genes associated with a single pattern} +\item[pval]{the maximum p-value considered significant} +\item[full]{logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.} +\item[k]{numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order} +\end{description} + +\hypertarget{output-3}{% +\subsubsection{Output}\label{output-3}} + +The output of the \texttt{intersectoR} function is a summary matrix showing the sets with statistically significant overlap under the specified \(p\)-value threshold based on a hypergeometric test. If \texttt{full==TRUE} the full data frame of significantly overlapping sets will also be returned. + +\hypertarget{correlation-based-projection}{% +\section{Correlation based projection}\label{correlation-based-projection}} + +Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the \texttt{projectR} function. This matrix can be user-generated or the result of the \texttt{correlateR} function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic \texttt{projectR} function. The \texttt{correlateR} function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows. + +\hypertarget{correlater}{% +\subsection{correlateR}\label{correlater}} + +\begin{verbatim} +library(projectR) +correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) +\end{verbatim} + +\hypertarget{input-arguments-4}{% +\subsubsection{Input Arguments}\label{input-arguments-4}} + +The inputs that must be set each time are only the genes and data, with all other inputs having default values. + +The arguments are as follows: + +\begin{description} +\item[genes]{gene or character vector of genes for reference expression pattern dat} +\item[data]{matrix or data frame with genes to be used for to calculate correlation} +\item[threshtype]{Default "R" indicates thresholding by R value or equivalent. Alternatively, "N" indicates a numerical cut off.} +\item[threshold]{numeric indicating value at which to make threshold} +\item[absR]{logical indicating where to include both positive and negatively correlated genes} +\item[...]{addtion imputes to the cor function} +\end{description} + +\hypertarget{output-4}{% +\subsubsection{Output}\label{output-4}} + +The output of the \texttt{correlateR} function is a \texttt{correlateR} class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the \texttt{correlateR} object before using it as input to the \texttt{projectR} function. + +\hypertarget{obtaining-and-visualizing-objects.}{% +\subsection{\texorpdfstring{Obtaining and visualizing \texttt{correlateR} objects.}{Obtaining and visualizing objects.}}\label{obtaining-and-visualizing-objects.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{data}\NormalTok{(}\StringTok{"p.RNAseq6l3c3t"}\NormalTok{)} + +\CommentTok{\# get genes correlated to T} +\NormalTok{cor2T<{-}}\KeywordTok{correlateR}\NormalTok{(}\DataTypeTok{genes=}\StringTok{"T"}\NormalTok{, }\DataTypeTok{dat=}\NormalTok{p.RNAseq6l3c3t, }\DataTypeTok{threshtype=}\StringTok{"N"}\NormalTok{, }\DataTypeTok{threshold=}\DecValTok{10}\NormalTok{, }\DataTypeTok{absR=}\OtherTok{TRUE}\NormalTok{)} +\NormalTok{cor2T <{-}}\StringTok{ }\NormalTok{cor2T}\OperatorTok{@}\NormalTok{corM} +\CommentTok{\#\#\# heatmap of genes more correlated to T} +\NormalTok{indx<{-}}\KeywordTok{unlist}\NormalTok{(}\KeywordTok{sapply}\NormalTok{(cor2T,rownames))} +\NormalTok{indx <{-}}\StringTok{ }\KeywordTok{as.vector}\NormalTok{(indx)} +\KeywordTok{colnames}\NormalTok{(p.RNAseq6l3c3t)<{-}pd.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sampleX} +\KeywordTok{library}\NormalTok{(reshape2)} +\CommentTok{\#\# Warning: package \textquotesingle{}reshape2\textquotesingle{} was built under R version 4.0.5} +\NormalTok{pm.RNAseq6l3c3t<{-}}\KeywordTok{melt}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(p.RNAseq6l3c3t[indx,],indx))} +\CommentTok{\#\# Using indx as id variables} + +\KeywordTok{library}\NormalTok{(gplots)} +\KeywordTok{library}\NormalTok{(ggplot2)} +\KeywordTok{library}\NormalTok{(viridis)} +\CommentTok{\#\# Warning: package \textquotesingle{}viridis\textquotesingle{} was built under R version 4.0.5} +\CommentTok{\#\# Loading required package: viridisLite} +\CommentTok{\#\# Warning: package \textquotesingle{}viridisLite\textquotesingle{} was built under R version 4.0.5} +\NormalTok{pCorT<{-}}\KeywordTok{ggplot}\NormalTok{(pm.RNAseq6l3c3t, }\KeywordTok{aes}\NormalTok{(variable, indx, }\DataTypeTok{fill =}\NormalTok{ value)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_tile}\NormalTok{(}\DataTypeTok{colour=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{1.5}\NormalTok{, }\DataTypeTok{stat=}\StringTok{"identity"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{scale\_fill\_viridis}\NormalTok{(}\DataTypeTok{option=}\StringTok{"B"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+}\StringTok{ }\KeywordTok{ylab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{scale\_y\_discrete}\NormalTok{(}\DataTypeTok{limits=}\NormalTok{indx) }\OperatorTok{+} +\StringTok{ }\KeywordTok{ggtitle}\NormalTok{(}\StringTok{"Ten genes most highly pos \& neg correlated with T"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} + \DataTypeTok{panel.border =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\OtherTok{NA}\NormalTok{,}\DataTypeTok{color=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{0.5}\NormalTok{, }\DataTypeTok{linetype=}\StringTok{"solid"}\NormalTok{),} + \DataTypeTok{panel.grid.major =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{panel.grid.minor =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{axis.line =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{axis.ticks =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{axis.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{),}\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{),} + \DataTypeTok{axis.text.x =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{angle =} \DecValTok{90}\NormalTok{,}\DataTypeTok{vjust=}\NormalTok{.}\DecValTok{5}\NormalTok{),} + \DataTypeTok{legend.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{color=}\StringTok{"white"}\NormalTok{, }\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{)),} + \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} + \DataTypeTok{legend.position =} \StringTok{"bottom"}\NormalTok{,} + \DataTypeTok{legend.title=}\KeywordTok{element\_blank}\NormalTok{()} +\NormalTok{)} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-5-1} \end{adjustwidth} + +\hypertarget{projecting-correlater-objects.}{% +\subsection{Projecting correlateR objects.}\label{projecting-correlater-objects.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to project into from RNAseq6l3c3t expression data} +\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} + +\KeywordTok{library}\NormalTok{(projectR)} +\NormalTok{cor2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{cor2T[[}\DecValTok{1}\NormalTok{]],}\DataTypeTok{full=}\OtherTok{FALSE}\NormalTok{,} + \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l}\OperatorTok{$}\NormalTok{GeneSymbols)} +\CommentTok{\#\# [1] "9 row names matched between data and loadings"} +\CommentTok{\#\# [1] "Updated dimension of data: 9 9"} +\end{Highlighting} +\end{Shaded} + +\hypertarget{differential-features-identification.}{% +\section{Differential features identification.}\label{differential-features-identification.}} + +\hypertarget{projectiondriver}{% +\subsection{projectionDriveR}\label{projectiondriver}} + +Given loadings that define the weight of features (genes) in a given latent space (e.g.~PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. + +\begin{verbatim} +library(projectR) +projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, + pvalue, pattern_name, display = T, normalize_pattern = T) +\end{verbatim} + +\hypertarget{input-arguments-5}{% +\subsubsection{Input Arguments}\label{input-arguments-5}} + +The required inputs are two feature by sample (e.g.~gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth. + +The arguments for projectionDriveR are: + +\begin{description} +\item[cellgroup1]{Matrix 1 with features as rows, samples as columns.} +\item[cellgroup2]{Matrix 2 with features as rows, samples as columns.} +\item[loadings]{Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space} +\item[loadingsNames]{Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used} +\item[pattern\_name]{the column name of the loadings by which the features will be weighted} +\item[pvalue]{Determines the significance of the confidence interval to be calculated between the difference of means} +\item[display]{Boolean. Whether or not to plot the estimates of significant features. Default = T} +\item[normalize\_pattern]{Boolean. Whether or not to normalize the average feature weight. Default = T} +\end{description} + +\hypertarget{output-5}{% +\subsubsection{Output}\label{output-5}} + +The output of \texttt{projectionDriveR} is a list of length five \texttt{mean\_ci} holds the confidence intervals for the difference in means for all features, \texttt{weighted\_ci} holds the confidence intervals for the weighted difference in means for all features, and normalized\_weights are the weights themselves. In addition, \texttt{significant\_genes} is a vector of gene names that are significantly different at the threshold provided. \texttt{plotted\_ci} returns the ggplot figure of the confidence intervals, see \texttt{plotConfidenceIntervals} for documentation. + +\hypertarget{identifying-differential-features-associated-with-learned-patterns}{% +\subsubsection{Identifying differential features associated with learned patterns}\label{identifying-differential-features-associated-with-learned-patterns}} + +\begin{Shaded} +\begin{Highlighting}[] +\KeywordTok{options}\NormalTok{(}\DataTypeTok{width =} \DecValTok{60}\NormalTok{)} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{library}\NormalTok{(dplyr, }\DataTypeTok{warn.conflicts =}\NormalTok{ F)} +\CommentTok{\#\# Warning: package \textquotesingle{}dplyr\textquotesingle{} was built under R version 4.0.5} + +\CommentTok{\#gene weights x pattern} +\KeywordTok{data}\NormalTok{(}\StringTok{"retinal\_patterns"}\NormalTok{)} + +\CommentTok{\#size{-}normed, log expression} +\KeywordTok{data}\NormalTok{(}\StringTok{"microglial\_counts"}\NormalTok{)} + +\CommentTok{\#size{-}normed, log expression} +\KeywordTok{data}\NormalTok{(}\StringTok{"glial\_counts"}\NormalTok{)} + +\CommentTok{\#the features by which to weight the difference in expression } +\NormalTok{pattern\_to\_weight <{-}}\StringTok{ "Pattern.24"} +\NormalTok{drivers <{-}}\StringTok{ }\KeywordTok{projectionDriveR}\NormalTok{(microglial\_counts, }\CommentTok{\#expression matrix} +\NormalTok{ glial\_counts, }\CommentTok{\#expression matrix} + \DataTypeTok{loadings =}\NormalTok{ retinal\_patterns, }\CommentTok{\#feature x pattern dataframe} + \DataTypeTok{loadingsNames =} \OtherTok{NULL}\NormalTok{,} + \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight, }\CommentTok{\#column name} + \DataTypeTok{pvalue =} \FloatTok{1e{-}5}\NormalTok{, }\CommentTok{\#pvalue before bonferroni correction} + \DataTypeTok{display =}\NormalTok{ T,} + \DataTypeTok{normalize\_pattern =}\NormalTok{ T) }\CommentTok{\#normalize feature weights} +\CommentTok{\#\# [1] "2996 row names matched between datasets"} +\CommentTok{\#\# [1] "2996"} +\CommentTok{\#\# [1] "Updated dimension of data: 2996"} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/projectionDriver-1} \end{adjustwidth} + +\begin{Shaded} +\begin{Highlighting}[] + +\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{mean\_ci[drivers}\OperatorTok{$}\NormalTok{significant\_genes,]} + +\KeywordTok{str}\NormalTok{(conf\_intervals)} +\CommentTok{\#\# \textquotesingle{}data.frame\textquotesingle{}: 253 obs. of 2 variables:} +\CommentTok{\#\# $ low : num 1.86 0.158 {-}0.562 {-}0.756 0.155 ...} +\CommentTok{\#\# $ high: num 2.03943 0.26729 {-}0.00197 {-}0.18521 0.23239 ...} +\end{Highlighting} +\end{Shaded} + +\hypertarget{plotconfidenceintervals}{% +\subsection{plotConfidenceIntervals}\label{plotconfidenceintervals}} + +\hypertarget{input}{% +\subsubsection{Input}\label{input}} + +The arguments for plotConfidenceIntervals are: + +\begin{description} +\item[confidence\_intervals]{A dataframe of features x estimates} +\item[interval\_name]{names of columns that contain the low and high estimates, respectively. +(default: c("low","high"))} +\item[pattern\_name]{string to use as the title for the plots} +\item[sort]{Boolean. Whether or not to sort genes by their estimates (default = T)} +\item[genes]{a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.)} +\item[weights]{weights of features to include as annotation (default = NULL will not include heatmap)} +\item[weights\_clip]{quantile of data to clip color scale for improved visualization (default: 0.99)} +\item[weights\_vis\_norm]{Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none"} +\end{description} + +\hypertarget{output-6}{% +\subsubsection{Output}\label{output-6}} + +A list of the length three that includes confidence interval plots and relevant info. \texttt{ci\_estimates\_plot} is the point-range plot for the provided estimates. If called from within \texttt{projectionDriveR}, the unweighted estimates are used. \texttt{feature\_order} is the vector of gene names in the order shown in the figure. \texttt{weights\_heatmap} is a heatmap annotation of the gene loadings, in the same order as above. + +\hypertarget{customize-plotting-of-confidence-intervals}{% +\subsubsection{Customize plotting of confidence intervals}\label{customize-plotting-of-confidence-intervals}} + +\begin{Shaded} +\begin{Highlighting}[] +\KeywordTok{library}\NormalTok{(cowplot)} +\CommentTok{\#\# Warning: package \textquotesingle{}cowplot\textquotesingle{} was built under R version 4.0.5} +\CommentTok{\#order in ascending order of estimates} +\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{conf\_intervals }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{mid =}\NormalTok{ (high}\OperatorTok{+}\NormalTok{low)}\OperatorTok{/}\DecValTok{2}\NormalTok{) }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{arrange}\NormalTok{(mid)} +\NormalTok{gene\_order <{-}}\StringTok{ }\KeywordTok{rownames}\NormalTok{(conf\_intervals)} + +\CommentTok{\#add text labels for top and bottom n genes} +\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name <{-}}\StringTok{ }\OtherTok{NA\_character\_} +\NormalTok{n <{-}}\StringTok{ }\DecValTok{2} +\NormalTok{idx <{-}}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\OperatorTok{:}\NormalTok{n, (}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{]}\OperatorTok{{-}}\NormalTok{(n}\DecValTok{{-}1}\NormalTok{))}\OperatorTok{:}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{])} +\NormalTok{gene\_ids <{-}}\StringTok{ }\NormalTok{gene\_order[idx]} +\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name[idx] <{-}}\StringTok{ }\NormalTok{gene\_ids} + +\CommentTok{\#the labels above can now be used as ggplot aesthetics} +\NormalTok{plots\_list <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(conf\_intervals, }\CommentTok{\#mean difference in expression confidence intervals} + \DataTypeTok{sort =}\NormalTok{ F, }\CommentTok{\#should genes be sorted by estimates} + \DataTypeTok{weights =}\NormalTok{ drivers}\OperatorTok{$}\NormalTok{normalized\_weights[}\KeywordTok{rownames}\NormalTok{(conf\_intervals)],} + \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight,} + \DataTypeTok{weights\_clip =} \FloatTok{0.99}\NormalTok{,} + \DataTypeTok{weights\_vis\_norm =} \StringTok{"none"}\NormalTok{)} + +\NormalTok{pl1 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} +\StringTok{ }\NormalTok{ggrepel}\OperatorTok{::}\KeywordTok{geom\_label\_repel}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{label =}\NormalTok{ label\_name), }\DataTypeTok{max.overlaps =} \DecValTok{20}\NormalTok{, }\DataTypeTok{force =} \DecValTok{50}\NormalTok{)} + +\NormalTok{pl2 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"weights\_heatmap"}\NormalTok{]]} + +\CommentTok{\#now plot the weighted differences} +\NormalTok{weighted\_conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{weighted\_mean\_ci[gene\_order,]} +\NormalTok{plots\_list\_weighted <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(weighted\_conf\_intervals,} + \DataTypeTok{sort =}\NormalTok{ F,} + \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight)} + +\NormalTok{pl3 <{-}}\StringTok{ }\NormalTok{plots\_list\_weighted[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} +\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{"Difference in weighted group means"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.title.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.ticks.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.text.y =} \KeywordTok{element\_blank}\NormalTok{())} + +\NormalTok{cowplot}\OperatorTok{::}\KeywordTok{plot\_grid}\NormalTok{(pl1, pl2, pl3, }\DataTypeTok{align =} \StringTok{"h"}\NormalTok{, }\DataTypeTok{rel\_widths =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,.}\DecValTok{4}\NormalTok{, }\DecValTok{1}\NormalTok{), }\DataTypeTok{ncol =} \DecValTok{3}\NormalTok{)} +\CommentTok{\#\# Warning: Removed 249 rows containing missing values} +\CommentTok{\#\# (geom\_label\_repel).} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-7-1} \end{adjustwidth} + +\hypertarget{refs}{} +\begin{cslreferences} +\leavevmode\hypertarget{ref-Barbakh:2009bw}{}% +Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. ``Review of Linear Projection Methods.'' In \emph{Non-Standard Parameter Adaptation for Exploratory Data Analysis}, 29--48. Berlin, Heidelberg: Springer Berlin Heidelberg. + +\leavevmode\hypertarget{ref-Sibisi1997}{}% +Sibisi, Sibusiso, and John Skilling. 1997. ``Prior Distributions on Measure Space.'' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)} 59 (1): 217--35. \url{https://doi.org/10.1111/1467-9868.00065}. + +\leavevmode\hypertarget{ref-Ochs2006}{}% +Wang, Guoli, Andrew V. Kossenkov, and Michael F. Ochs. 2006. ``LS-Nmf: A Modified Non-Negative Matrix Factorization Algorithm Utilizing Uncertainty Estimates.'' \emph{BMC Bioinformatics} 7 (1): 175. \url{https://doi.org/10.1186/1471-2105-7-175}. +\end{cslreferences} + + +\end{document} From 0f44c1e4ba7f8b5e9d01f1fc3f3800261adc724a Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 15:35:21 -0500 Subject: [PATCH 11/33] Updated man --- NAMESPACE | 3 +++ R/plotting.R | 2 ++ man/bonferroniCorrectedDifferences.Rd | 18 ++++++++++---- man/pdVolcano.Rd | 34 +++++++++++++++++++++++++++ man/plotConfidenceIntervals.Rd | 7 ++++-- man/projectionDriveR.Rd | 7 ++++-- 6 files changed, 62 insertions(+), 9 deletions(-) create mode 100644 man/pdVolcano.Rd diff --git a/NAMESPACE b/NAMESPACE index e05318d..2ab6ef2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(getTSNE) export(getUMAP) export(intersectoR) export(multivariateAnalysisR) +export(pdVolcano) export(plotConfidenceIntervals) export(projectR) export(projectionDriveR) @@ -36,7 +37,9 @@ importFrom(cowplot,plot_grid) importFrom(dplyr,"%>%") importFrom(dplyr,dense_rank) importFrom(dplyr,mutate) +importFrom(ggpubr,ggarrange) importFrom(ggrepel,geom_label_repel) +importFrom(ggrepel,geom_text_repel) importFrom(grDevices,colorRampPalette) importFrom(methods,callNextMethod) importFrom(scales,squish) diff --git a/R/plotting.R b/R/plotting.R index fdc74f2..bbc4a83 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -148,6 +148,8 @@ plotConfidenceIntervals <- function( #' @importFrom ggrepel geom_text_repel #' @importFrom ggpubr ggarrange #' @import dplyr +#' @return A list with weighted and unweighted differential expression metrics +#' @export #plot FC, weighted and unweighted. Designed for use with the output of projectionDriveRs pdVolcano <- function(result, FC = 0.2, diff --git a/man/bonferroniCorrectedDifferences.Rd b/man/bonferroniCorrectedDifferences.Rd index 74f5f97..e0cfccc 100644 --- a/man/bonferroniCorrectedDifferences.Rd +++ b/man/bonferroniCorrectedDifferences.Rd @@ -1,20 +1,28 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/projectionDriveR.R +% Please edit documentation in R/projectionDriveRfun.R \name{bonferroniCorrectedDifferences} \alias{bonferroniCorrectedDifferences} \title{bonferroniCorrectedDifferences} \usage{ -bonferroniCorrectedDifferences(group1, group2, diff_weights = NULL, pvalue) +bonferroniCorrectedDifferences( + group1, + group2, + diff_weights = NULL, + mode = "CI", + pvalue +) } \arguments{ \item{group1}{count matrix 1} \item{group2}{count matrix 2} -\item{diff_weights}{oadings to weight the differential expression between the groups} +\item{diff_weights}{loadings to weight the differential expression between the groups} -\item{pvalue}{significance value to threshold at} +\item{mode}{user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI} + +\item{pvalue}{significance value to threshold} } \description{ -Calculate the (weighted) difference in means for each measurement between two groups. +Calculate the weighted and unweighted difference in means for each measurement between two groups. } diff --git a/man/pdVolcano.Rd b/man/pdVolcano.Rd new file mode 100644 index 0000000..5db483d --- /dev/null +++ b/man/pdVolcano.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{pdVolcano} +\alias{pdVolcano} +\title{pdVolcano} +\usage{ +pdVolcano( + result, + FC = 0.2, + pvalue = NULL, + subset = NULL, + filter.inf = FALSE, + label.num = 5 +) +} +\arguments{ +\item{result}{result output from projectionDriveR function with PI method selected} + +\item{FC}{fold change threshold, default at 0.2} + +\item{pvalue}{significance threshold, default set to pvalue stored in projectionDriveR output} + +\item{subset}{vector of gene names to subset the plot by} + +\item{filter.inf}{remove genes that have pvalues below machine double minimum value} + +\item{label.no}{Number of genes to label on either side of the volcano plot, default 5} +} +\value{ +A list with weighted and unweighted differential expression metrics +} +\description{ +Generate volcano plot and gate genes based on fold change and pvalue, includes vectors that can be used with fast gene set enrichment (fgsea) +} diff --git a/man/plotConfidenceIntervals.Rd b/man/plotConfidenceIntervals.Rd index ec1c5a5..f67993a 100644 --- a/man/plotConfidenceIntervals.Rd +++ b/man/plotConfidenceIntervals.Rd @@ -12,7 +12,8 @@ plotConfidenceIntervals( genes = NULL, weights = NULL, weights_clip = 0.99, - weights_vis_norm = "none" + weights_vis_norm = "none", + weighted = F ) } \arguments{ @@ -30,7 +31,9 @@ plotConfidenceIntervals( \item{weights_clip}{optional. quantile of data to clip color scale for improved visualization. Default: 0.99} -\item{weights_vis_norm}{Which processed version of weights to visualize as a heatmap. +\item{weights_vis_norm}{Which processed version of weights to visualize as a heatmap.} + +\item{weighted}{specifies whether the confidence intervals in use are weighted by the pattern and labels plots accordingly Options are "none" (which uses provided weights) or "quantiles". Default: none} } \value{ diff --git a/man/projectionDriveR.Rd b/man/projectionDriveR.Rd index e562c2d..3b17249 100644 --- a/man/projectionDriveR.Rd +++ b/man/projectionDriveR.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/projectionDriveR.R +% Please edit documentation in R/projectionDriveRfun.R \name{projectionDriveR} \alias{projectionDriveR} \title{projectionDriveR} @@ -12,7 +12,8 @@ projectionDriveR( pattern_name, pvalue = 1e-05, display = TRUE, - normalize_pattern = TRUE + normalize_pattern = TRUE, + mode = "CI" ) } \arguments{ @@ -31,6 +32,8 @@ projectionDriveR( \item{display}{boolean. Whether or not to plot and display confidence intervals} \item{normalize_pattern}{Boolean. Whether or not to normalize pattern weights.} + +\item{mode}{user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI} } \value{ A list with weighted mean differences, mean differences, and differential genes that meet the provided signficance threshold. From 3634fa7e47cca1d6de0a39a31b025985a64c69e4 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 15:58:24 -0500 Subject: [PATCH 12/33] Update namespace and description --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/plotting.R | 4 ++-- man/pdVolcano.Rd | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c509f99..1272c91 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,8 @@ Imports: cowplot, ggrepel, umap, - tsne + tsne, + ggpubr Suggests: BiocStyle, CoGAPS, diff --git a/NAMESPACE b/NAMESPACE index 2ab6ef2..2825649 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ import(cluster) import(dplyr) import(ggalluvial) import(ggplot2) +import(ggpubr) import(limma) import(reshape2) import(scales, except = viridis_pal) diff --git a/R/plotting.R b/R/plotting.R index bbc4a83..bd1a68c 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -142,11 +142,11 @@ plotConfidenceIntervals <- function( #' @param pvalue significance threshold, default set to pvalue stored in projectionDriveR output #' @param subset vector of gene names to subset the plot by #' @param filter.inf remove genes that have pvalues below machine double minimum value -#' @param label.no Number of genes to label on either side of the volcano plot, default 5 +#' @param label.num Number of genes to label on either side of the volcano plot, default 5 +#' @import ggpubr #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @importFrom ggrepel geom_text_repel -#' @importFrom ggpubr ggarrange #' @import dplyr #' @return A list with weighted and unweighted differential expression metrics #' @export diff --git a/man/pdVolcano.Rd b/man/pdVolcano.Rd index 5db483d..7a4967e 100644 --- a/man/pdVolcano.Rd +++ b/man/pdVolcano.Rd @@ -24,7 +24,7 @@ pdVolcano( \item{filter.inf}{remove genes that have pvalues below machine double minimum value} -\item{label.no}{Number of genes to label on either side of the volcano plot, default 5} +\item{label.num}{Number of genes to label on either side of the volcano plot, default 5} } \value{ A list with weighted and unweighted differential expression metrics From 580877f3be8ad8d850e35ba2c5cf1f11c707e2cb Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 16:07:41 -0500 Subject: [PATCH 13/33] Update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1272c91..9fbcc60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Imports: ggrepel, umap, tsne, - ggpubr + ggpubr, + magick Suggests: BiocStyle, CoGAPS, From 2c5f667cab6121be89fb8427ceaf1a8d645623ec Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 16 Jan 2024 16:52:57 -0500 Subject: [PATCH 14/33] vignette update --- vignettes/projectR.Rmd | 2 +- vignettes/projectR.html | 27 ++++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index fb4c08e..5df583b 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -415,7 +415,7 @@ drivers <- projectionDriveR(microglial_counts, #expression matrix loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction - display = T, + display = F, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode diff --git a/vignettes/projectR.html b/vignettes/projectR.html index 4fc7ea7..19d5979 100644 --- a/vignettes/projectR.html +++ b/vignettes/projectR.html @@ -949,6 +949,9 @@

4.1 Obtaining CoGAPS patterns to library(gplots)
## 
 ## Attaching package: 'gplots'
+
## The following object is masked from 'package:S4Vectors':
+## 
+##     space
## The following object is masked from 'package:stats':
 ## 
 ##     lowess
@@ -958,7 +961,7 @@

4.1 Obtaining CoGAPS patterns to cexCol=1,cexRow=.5,scale = "row", hclustfun=function(x) hclust(x, method="average") ) -

+

4.2 Projecting CoGAPS objects

@@ -1117,14 +1120,12 @@

6.2 Obtaining and visualizing ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ℹ Please use `linewidth` instead. ## This warning is displayed once every 8 hours. -## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was -## generated. +## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
 ## ℹ Please use the `linewidth` argument instead.
 ## This warning is displayed once every 8 hours.
-## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
-## generated.
-

+## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. +

6.3 Projecting correlateR objects.

@@ -1189,14 +1190,13 @@

7.1.3 Identifying differential fe loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction - display = T, + display = F, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode
## [1] "Mode: CI"
 ## [1] "2996 row names matched between datasets"
 ## [1] "Updated dimension of data: 2996"
 ## the length of shared genes are: 253
-

conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,]
 
 str(conf_intervals)
@@ -1227,8 +1227,13 @@

7.2.2 Output

7.2.3 Customize plotting of confidence intervals

-
suppressWarnings(library(cowplot))
-#order in ascending order of estimates
+
suppressWarnings(library(cowplot))
+
## 
+## Attaching package: 'cowplot'
+
## The following object is masked from 'package:projectR':
+## 
+##     get_legend
+
#order in ascending order of estimates
 conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid)
 gene_order <- rownames(conf_intervals)
 
@@ -1266,7 +1271,7 @@ 

7.2.3 Customize plotting of confi cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3)

## Warning: Removed 249 rows containing missing values
 ## (`geom_label_repel()`).
-

+

From ddf882610c72d43588dfcdc8be24ef7f44b86997 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 17 Jan 2024 10:13:06 -0500 Subject: [PATCH 15/33] removed dependencies ggpubr, magick --- DESCRIPTION | 4 +--- NAMESPACE | 2 -- R/plotting.R | 3 +-- R/projectionDriveRfun.R | 7 +++---- vignettes/projectR.Rmd | 4 ++-- 5 files changed, 7 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9fbcc60..c509f99 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,9 +26,7 @@ Imports: cowplot, ggrepel, umap, - tsne, - ggpubr, - magick + tsne Suggests: BiocStyle, CoGAPS, diff --git a/NAMESPACE b/NAMESPACE index 2825649..43a7bea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,6 @@ import(cluster) import(dplyr) import(ggalluvial) import(ggplot2) -import(ggpubr) import(limma) import(reshape2) import(scales, except = viridis_pal) @@ -38,7 +37,6 @@ importFrom(cowplot,plot_grid) importFrom(dplyr,"%>%") importFrom(dplyr,dense_rank) importFrom(dplyr,mutate) -importFrom(ggpubr,ggarrange) importFrom(ggrepel,geom_label_repel) importFrom(ggrepel,geom_text_repel) importFrom(grDevices,colorRampPalette) diff --git a/R/plotting.R b/R/plotting.R index bd1a68c..62e43bc 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -143,7 +143,6 @@ plotConfidenceIntervals <- function( #' @param subset vector of gene names to subset the plot by #' @param filter.inf remove genes that have pvalues below machine double minimum value #' @param label.num Number of genes to label on either side of the volcano plot, default 5 -#' @import ggpubr #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @importFrom ggrepel geom_text_repel @@ -276,7 +275,7 @@ pdVolcano <- function(result, axis.title=element_text(size=14), legend.text = element_text(size=12)) - plt <- ggpubr::ggarrange(unweightedvolcano, weightedvolcano, common.legend = TRUE, legend = "bottom") + plt <- cowplot::plot_grid(unweightedvolcano, weightedvolcano, ncol = 2, align = "h") print(plt) #return a list of genes that can be used as input to fgsea diff --git a/R/projectionDriveRfun.R b/R/projectionDriveRfun.R index 14a201d..addcc4a 100644 --- a/R/projectionDriveRfun.R +++ b/R/projectionDriveRfun.R @@ -137,7 +137,6 @@ bonferroniCorrectedDifferences <- function( #' Calculate the weighted difference in expression between two groups (group1 - group2) #' #' @importFrom cowplot plot_grid -#' @importFrom ggpubr ggarrange #' @param cellgroup1 gene x cell count matrix for cell group 1 #' @param cellgroup2 gene x cell count matrix for cell group 2 #' @param loadings A matrix of continuous values defining the features @@ -284,14 +283,14 @@ projectionDriveR<-function( ncol = 2, align = "h", rel_widths = c(1,.3))) - print(pl1_u) + #print(pl1_u) pl2_w <- (cowplot::plot_grid(pl_w[["ci_estimates_plot"]], pl_w[["weights_heatmap"]], ncol = 2, align = "h", rel_widths = c(1,.3))) - print(pl2_w) - plt <- ggpubr::ggarrange(pl1_u, pl2_w, common.legend = TRUE, legend = "bottom") + #print(pl2_w) + plt <- cowplot::plot_grid(pl1_u, pl2_w, ncol = 2, align = "h") print(plt) } diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index 5df583b..5813237 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -19,7 +19,7 @@ vignette: > --- ```{r, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set(echo = TRUE, crop = NULL) options(scipen = 1, digits = 2) set.seed(1234) ``` @@ -415,7 +415,7 @@ drivers <- projectionDriveR(microglial_counts, #expression matrix loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction - display = F, + display = T, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode From 7af3d64fe3ae68738cc921ee57c1a7bc7f8f44f6 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 17 Jan 2024 10:28:53 -0500 Subject: [PATCH 16/33] Update projectR.Rmd --- vignettes/projectR.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index 5813237..0ef934e 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -9,6 +9,7 @@ author: date: "`r BiocStyle::doc_date()`" output: BiocStyle::html_document +fig_crop: false bibliography: projectR.bib description: | Functions for the Projection of Weights from PCA, CoGAPS, NMF, Correlation, and Clustering @@ -19,7 +20,7 @@ vignette: > --- ```{r, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, crop = NULL) +knitr::opts_chunk$set(echo = TRUE) options(scipen = 1, digits = 2) set.seed(1234) ``` From 6ea2ca7572d46e7642ea7aab125c9b4812ca4e15 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 17 Jan 2024 11:46:08 -0500 Subject: [PATCH 17/33] Build check Removed cropping and magick from vignette --- R/plotting.R | 2 +- vignettes/projectR.Rmd | 3 +-- vignettes/projectR.html | 29 +++++++++++------------------ 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 62e43bc..9fc616e 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -122,7 +122,7 @@ plotConfidenceIntervals <- function( wt_heatmap <- ggplot(data = confidence_intervals) + geom_tile(aes(x = 1, y = 1:n, fill = weights)) + scale_fill_viridis(limits=c(0, quantile(ordered_weights,weights_clip )), - oob=squish, + oob=scales::squish, name = hm_name) + theme_void() diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index 0ef934e..16410d5 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -397,7 +397,6 @@ The output of `projectionDriveR` is a list of length five `mean_ci` holds the co options(width = 60) library(projectR) library(dplyr, warn.conflicts = F) -library(magick) #gene weights x pattern data("retinal_patterns") @@ -416,7 +415,7 @@ drivers <- projectionDriveR(microglial_counts, #expression matrix loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction - display = T, + display = F, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode diff --git a/vignettes/projectR.html b/vignettes/projectR.html index 19d5979..f2b78b9 100644 --- a/vignettes/projectR.html +++ b/vignettes/projectR.html @@ -15,7 +15,7 @@ - + projectR Vignette @@ -729,7 +729,7 @@

projectR Vignette

Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien

-

16 January 2024

+

17 January 2024

@@ -949,9 +949,6 @@

4.1 Obtaining CoGAPS patterns to library(gplots)
## 
 ## Attaching package: 'gplots'
-
## The following object is masked from 'package:S4Vectors':
-## 
-##     space
## The following object is masked from 'package:stats':
 ## 
 ##     lowess
@@ -961,7 +958,7 @@

4.1 Obtaining CoGAPS patterns to cexCol=1,cexRow=.5,scale = "row", hclustfun=function(x) hclust(x, method="average") ) -

+

4.2 Projecting CoGAPS objects

@@ -1120,12 +1117,14 @@

6.2 Obtaining and visualizing ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ℹ Please use `linewidth` instead. ## This warning is displayed once every 8 hours. -## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. +## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was +## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
 ## ℹ Please use the `linewidth` argument instead.
 ## This warning is displayed once every 8 hours.
-## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
-

+## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was +## generated. +

6.3 Projecting correlateR objects.

@@ -1171,7 +1170,6 @@

7.1.3 Identifying differential fe
options(width = 60)
 library(projectR)
 library(dplyr, warn.conflicts = F)
-library(magick)
 
 #gene weights x pattern
 data("retinal_patterns")
@@ -1227,13 +1225,8 @@ 

7.2.2 Output

7.2.3 Customize plotting of confidence intervals

-
suppressWarnings(library(cowplot))
-
## 
-## Attaching package: 'cowplot'
-
## The following object is masked from 'package:projectR':
-## 
-##     get_legend
-
#order in ascending order of estimates
+
suppressWarnings(library(cowplot))
+#order in ascending order of estimates
 conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid)
 gene_order <- rownames(conf_intervals)
 
@@ -1271,7 +1264,7 @@ 

7.2.3 Customize plotting of confi cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3)

## Warning: Removed 249 rows containing missing values
 ## (`geom_label_repel()`).
-

+

From 1188a94e0d05c74f2b88851fa32ef55462fe38b4 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 17 Jan 2024 11:47:57 -0500 Subject: [PATCH 18/33] Delete projectR.tex --- vignettes/projectR.tex | 758 ----------------------------------------- 1 file changed, 758 deletions(-) delete mode 100644 vignettes/projectR.tex diff --git a/vignettes/projectR.tex b/vignettes/projectR.tex deleted file mode 100644 index 5789790..0000000 --- a/vignettes/projectR.tex +++ /dev/null @@ -1,758 +0,0 @@ -\documentclass[]{article} -\usepackage{lmodern} -\usepackage{amssymb,amsmath} -\usepackage{ifxetex,ifluatex} -\usepackage{fixltx2e} % provides \textsubscript -\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex - \usepackage[T1]{fontenc} - \usepackage[utf8]{inputenc} -\else % if luatex or xelatex - \ifxetex - \usepackage{mathspec} - \else - \usepackage{fontspec} - \fi - \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} -\fi -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} -% use microtype if available -\IfFileExists{microtype.sty}{% -\usepackage{microtype} -\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts -}{} - - -\usepackage{longtable,booktabs} -\usepackage{graphicx} -% grffile has become a legacy package: https://ctan.org/pkg/grffile -\IfFileExists{grffile.sty}{% -\usepackage{grffile} -}{} -\makeatletter -\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} -\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} -\makeatother -% Scale images if necessary, so that they will not overflow the page -% margins by default, and it is still possible to overwrite the defaults -% using explicit options in \includegraphics[width, height, ...]{} -\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} -\IfFileExists{parskip.sty}{% -\usepackage{parskip} -}{% else -\setlength{\parindent}{0pt} -\setlength{\parskip}{6pt plus 2pt minus 1pt} -} -\setlength{\emergencystretch}{3em} % prevent overfull lines -\providecommand{\tightlist}{% - \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} -\setcounter{secnumdepth}{5} - -%%% Use protect on footnotes to avoid problems with footnotes in titles -\let\rmarkdownfootnote\footnote% -\def\footnote{\protect\rmarkdownfootnote} - -%%% Change title format to be more compact -\usepackage{titling} - -% Create subtitle command for use in maketitle -\providecommand{\subtitle}[1]{ - \posttitle{ - \begin{center}\large#1\end{center} - } -} - -\setlength{\droptitle}{-2em} - -\RequirePackage[]{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/Bioconductor} - -\bioctitle[]{projectR Vignette} - \pretitle{\vspace{\droptitle}\centering\huge} - \posttitle{\par} -\author{Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien} - \preauthor{\centering\large\emph} - \postauthor{\par} - \predate{\centering\large\emph} - \postdate{\par} - \date{20 May 2022} - -% code highlighting -\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251} -\makeatletter -\@ifundefined{AddToHook}{}{\AddToHook{package/xcolor/after}{\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251}}} -\makeatother -\newcommand{\hlnum}[1]{\textcolor[rgb]{0.816,0.125,0.439}{#1}}% -\newcommand{\hlstr}[1]{\textcolor[rgb]{0.251,0.627,0.251}{#1}}% -\newcommand{\hlcom}[1]{\textcolor[rgb]{0.502,0.502,0.502}{\textit{#1}}}% -\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% -\newcommand{\hlstd}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% -\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.125,0.125,0.941}{#1}}% -\newcommand{\hlkwb}[1]{\textcolor[rgb]{0,0,0}{#1}}% -\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% -\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.878,0.439,0.125}{#1}}% -\let\hlipl\hlkwb -% -\usepackage{fancyvrb} -\newcommand{\VerbBar}{|} -\newcommand{\VERB}{\Verb[commandchars=\\\{\}]} -\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} -% -\newenvironment{Shaded}{\begin{myshaded}}{\end{myshaded}} -% set background for result chunks -\let\oldverbatim\verbatim -\renewenvironment{verbatim}{\color{codecolor}\begin{myshaded}\begin{oldverbatim}}{\end{oldverbatim}\end{myshaded}} -% -\newcommand{\KeywordTok}[1]{\hlkwd{#1}} -\newcommand{\DataTypeTok}[1]{\hlkwc{#1}} -\newcommand{\DecValTok}[1]{\hlnum{#1}} -\newcommand{\BaseNTok}[1]{\hlnum{#1}} -\newcommand{\FloatTok}[1]{\hlnum{#1}} -\newcommand{\ConstantTok}[1]{\hlnum{#1}} -\newcommand{\CharTok}[1]{\hlstr{#1}} -\newcommand{\SpecialCharTok}[1]{\hlstr{#1}} -\newcommand{\StringTok}[1]{\hlstr{#1}} -\newcommand{\VerbatimStringTok}[1]{\hlstr{#1}} -\newcommand{\SpecialStringTok}[1]{\hlstr{#1}} -\newcommand{\ImportTok}[1]{{#1}} -\newcommand{\CommentTok}[1]{\hlcom{#1}} -\newcommand{\DocumentationTok}[1]{\hlcom{#1}} -\newcommand{\AnnotationTok}[1]{\hlcom{#1}} -\newcommand{\CommentVarTok}[1]{\hlcom{#1}} -\newcommand{\OtherTok}[1]{{#1}} -\newcommand{\FunctionTok}[1]{\hlstd{#1}} -\newcommand{\VariableTok}[1]{\hlstd{#1}} -\newcommand{\ControlFlowTok}[1]{\hlkwd{#1}} -\newcommand{\OperatorTok}[1]{\hlopt{#1}} -\newcommand{\BuiltInTok}[1]{{#1}} -\newcommand{\ExtensionTok}[1]{{#1}} -\newcommand{\PreprocessorTok}[1]{\textit{#1}} -\newcommand{\AttributeTok}[1]{{#1}} -\newcommand{\RegionMarkerTok}[1]{{#1}} -\newcommand{\InformationTok}[1]{\textcolor{messagecolor}{#1}} -\newcommand{\WarningTok}[1]{\textcolor{warningcolor}{#1}} -\newcommand{\AlertTok}[1]{\textcolor{errorcolor}{#1}} -\newcommand{\ErrorTok}[1]{\textcolor{errorcolor}{#1}} -\newcommand{\NormalTok}[1]{\hlstd{#1}} -% -\AtBeginDocument{\bibliographystyle{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/unsrturl}} - - -\begin{document} -\maketitle - - -{ -\setcounter{tocdepth}{2} -\tableofcontents -\newpage -} -\hypertarget{introduction}{% -\section{Introduction}\label{introduction}} - -Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics---genomics, transcriptomics, epigenomics, proteomics, etc.---each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for \emph{in silico} experiments, or additional information on the algorithm, see \href{https://www.sciencedirect.com/science/article/pii/S2405471219301462}{Stein-O'Brien, et al}. - -\hypertarget{getting-started-with-projectr}{% -\section{Getting started with projectR}\label{getting-started-with-projectr}} - -\hypertarget{installation-instructions}{% -\subsection{Installation Instructions}\label{installation-instructions}} - -For automatic Bioconductor package installation, start R, and run: - -\begin{verbatim} -BiocManager::install("projectR") -\end{verbatim} - -\hypertarget{methods}{% -\subsection{Methods}\label{methods}} - -Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to ``omic'' analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless. - -\hypertarget{the-base-projectr-function}{% -\subsection{The base projectR function}\label{the-base-projectr-function}} - -The generic projectR function is executed as follows: - -\begin{verbatim} -library(projectR) -projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) -\end{verbatim} - -\hypertarget{input-arguments}{% -\subsubsection{Input Arguments}\label{input-arguments}} - -The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e.~a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the /code\{dataNames\} and /code\{loadingNames\} arguments can be manually supplied by the user. - -The arguments are as follows: - -\begin{description} -\item[data]{a dataset to be projected into the pattern space} -\item[loadings]{a matrix of continous values with unique rownames to be projected} -\item[dataNames]{a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by \texttt{rownames(data)}. Order of names in vector must match order of rows in data.} -\item[loadingsNames]{a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by \texttt{rownames(loadings)}. Order of names in vector must match order of rows in loadings.} -\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} -\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} -\end{description} - -The \texttt{loadings} argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF). - -\hypertarget{output}{% -\subsubsection{Output}\label{output}} - -The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the proceedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. - -\hypertarget{pca-projection}{% -\section{PCA projection}\label{pca-projection}} - -Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, \(V\), and \(\Re^n\) which preserves inner products. If \(V\) is an inner product space over \(\Re\) with orthonormal basis \(B = v_1,...,v_n\) and \(v \epsilon V s.t [v]_B = (r_1,...,r_n)\), then finding the coordinate of \(v_i\) in \(v\) is precisely the inner product of \(v\) with \(v_i\), i.e.~\(r_i = \langle v,v_i \rangle\). This formulation is implemented for only those genes belonging to both the new data and the PC space. The \texttt{projectR} function has S4 method for class \texttt{prcomp}. - -\hypertarget{obtaining-pcs-to-project.}{% -\subsection{Obtaining PCs to project.}\label{obtaining-pcs-to-project.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to define PCs} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{data}\NormalTok{(p.RNAseq6l3c3t)} - -\CommentTok{\# do PCA on RNAseq6l3c3t expression data} -\NormalTok{pc.RNAseq6l3c3t<{-}}\KeywordTok{prcomp}\NormalTok{(}\KeywordTok{t}\NormalTok{(p.RNAseq6l3c3t))} -\NormalTok{pcVAR <{-}}\StringTok{ }\KeywordTok{round}\NormalTok{(((pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev)}\OperatorTok{\^{}}\DecValTok{2}\OperatorTok{/}\KeywordTok{sum}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev}\OperatorTok{\^{}}\DecValTok{2}\NormalTok{))}\OperatorTok{*}\DecValTok{100}\NormalTok{,}\DecValTok{2}\NormalTok{)} -\NormalTok{dPCA <{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{x,pd.RNAseq6l3c3t))} - -\CommentTok{\#plot pca} -\KeywordTok{library}\NormalTok{(ggplot2)} -\NormalTok{setCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Condition:"}\NormalTok{)} -\NormalTok{setFILL <{-}}\StringTok{ }\KeywordTok{scale\_fill\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{),}\DataTypeTok{guide =} \OtherTok{FALSE}\NormalTok{)} -\NormalTok{setPCH <{-}}\StringTok{ }\KeywordTok{scale\_shape\_manual}\NormalTok{(}\DataTypeTok{values=}\KeywordTok{c}\NormalTok{(}\DecValTok{23}\NormalTok{,}\DecValTok{22}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{21}\NormalTok{,}\DecValTok{24}\NormalTok{),}\DataTypeTok{name=}\StringTok{"Cell Line:"}\NormalTok{)} - -\NormalTok{pPCA <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{ID.cond, }\DataTypeTok{shape=}\NormalTok{ID.line,} - \DataTypeTok{fill=}\NormalTok{ID.cond)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{size=}\NormalTok{days),}\DataTypeTok{alpha=}\NormalTok{.}\DecValTok{6}\NormalTok{)}\OperatorTok{+} -\StringTok{ }\NormalTok{setCOL }\OperatorTok{+}\StringTok{ }\NormalTok{setPCH }\OperatorTok{+}\StringTok{ }\NormalTok{setFILL }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale\_size\_area}\NormalTok{(}\DataTypeTok{breaks =} \KeywordTok{c}\NormalTok{(}\DecValTok{2}\NormalTok{,}\DecValTok{4}\NormalTok{,}\DecValTok{6}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Day"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} - \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} - \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"transparent"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} - \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"PCA of hPSC PolyA RNAseq"}\NormalTok{,} - \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC1 ("}\NormalTok{,pcVAR[}\DecValTok{1}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} - \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC2 ("}\NormalTok{,pcVAR[}\DecValTok{2}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} -\end{Highlighting} -\end{Shaded} - -\hypertarget{projecting-prcomp-objects}{% -\subsection{Projecting prcomp objects}\label{projecting-prcomp-objects}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} -\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} - -\KeywordTok{library}\NormalTok{(projectR)} -\NormalTok{PCA2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(}\DataTypeTok{data =}\NormalTok{ p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{pc.RNAseq6l3c3t,} -\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{, }\DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} -\CommentTok{\#\# [1] "93 row names matched between data and loadings"} -\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} - -\NormalTok{pd.ESepiGen4c1l<{-}}\KeywordTok{data.frame}\NormalTok{(}\DataTypeTok{Condition=}\KeywordTok{sapply}\NormalTok{(}\KeywordTok{colnames}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq),} - \ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{unlist}\NormalTok{(}\KeywordTok{strsplit}\NormalTok{(x,}\StringTok{\textquotesingle{}\_\textquotesingle{}}\NormalTok{))[}\DecValTok{1}\NormalTok{]),}\DataTypeTok{stringsAsFactors=}\OtherTok{FALSE}\NormalTok{)} -\NormalTok{pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color<{-}}\KeywordTok{c}\NormalTok{(}\KeywordTok{rep}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"green"}\NormalTok{,}\DecValTok{3}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"black"}\NormalTok{,}\DecValTok{2}\NormalTok{))} -\KeywordTok{names}\NormalTok{(pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color)<{-}pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{Cond} - -\NormalTok{dPCA2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(PCA2ESepi[[}\DecValTok{1}\NormalTok{]]),pd.ESepiGen4c1l))} - -\CommentTok{\#plot pca} -\KeywordTok{library}\NormalTok{(ggplot2)} -\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} - \DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} - -\NormalTok{pPC2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} - \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} - \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} - \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} - \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## Warning: package 'gridExtra' was built under R version 4.0.5 -## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please -## use `guide = "none"` instead. -\end{verbatim} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-2-1} \end{adjustwidth} - -\hypertarget{nmf-projection}{% -\section{NMF projection}\label{nmf-projection}} - -NMF decomposes a data matrix of \(D\) with \(N\) genes as rows and \(M\) samples as columns, into two matrices, as \(D ~ AP\). The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (\({\bf{P}}\)) and the corresponding distribution matrix of weights (\({\bf{A}}\)) whose product forms a mock data matrix (\({\bf{M}}\)) that represents the gene-wise data \({\bf{D}}\) within noise limits (\(\boldsymbol{\varepsilon}\)). That is, -\begin{equation} -{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon}. -\label{eq:matrixDecomp} -\end{equation} -The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling. - -Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in \ref{eq:matrixDecomp} for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}} package. The \texttt{projectR} function has S4 method for class \texttt{Linear Embedding Matrix, LME}. - -\begin{verbatim} -library(projectR) -projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, - NP = NA, full = FALSE) -\end{verbatim} - -\hypertarget{input-arguments-1}{% -\subsubsection{Input Arguments}\label{input-arguments-1}} - -The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names--rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running. - -The arguments are as follows: - -\begin{description} -\item[data]{a target dataset to be projected into the pattern space} -\item[loadings]{a CogapsResult object} -\item[dataNames]{rownames (eg. gene names) of the target dataset, if different from existing rownames of data} -\item[loadingsNames] loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames -\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} -\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} -\end{description} - -\hypertarget{output-1}{% -\subsubsection{Output}\label{output-1}} - -The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the procedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. - -\hypertarget{obtaining-cogaps-patterns-to-project.}{% -\subsection{Obtaining CoGAPS patterns to project.}\label{obtaining-cogaps-patterns-to-project.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# get data} -\KeywordTok{library}\NormalTok{(projectR)} -\NormalTok{AP <{-}}\StringTok{ }\KeywordTok{get}\NormalTok{(}\KeywordTok{data}\NormalTok{(}\StringTok{"AP.RNAseq6l3c3t"}\NormalTok{)) }\CommentTok{\#CoGAPS run data} -\NormalTok{AP <{-}}\StringTok{ }\NormalTok{AP}\OperatorTok{$}\NormalTok{Amean} -\CommentTok{\# heatmap of gene weights for CoGAPs patterns} -\KeywordTok{library}\NormalTok{(gplots)} -\CommentTok{\#\# Warning: package \textquotesingle{}gplots\textquotesingle{} was built under R version 4.0.5} -\CommentTok{\#\# } -\CommentTok{\#\# Attaching package: \textquotesingle{}gplots\textquotesingle{}} -\CommentTok{\#\# The following object is masked from \textquotesingle{}package:projectR\textquotesingle{}:} -\CommentTok{\#\# } -\CommentTok{\#\# lowess} -\CommentTok{\#\# The following object is masked from \textquotesingle{}package:stats\textquotesingle{}:} -\CommentTok{\#\# } -\CommentTok{\#\# lowess} -\KeywordTok{par}\NormalTok{(}\DataTypeTok{mar=}\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{))} -\NormalTok{pNMF<{-}}\KeywordTok{heatmap.2}\NormalTok{(}\KeywordTok{as.matrix}\NormalTok{(AP),}\DataTypeTok{col=}\NormalTok{bluered, }\DataTypeTok{trace=}\StringTok{\textquotesingle{}none\textquotesingle{}}\NormalTok{,} - \DataTypeTok{distfun=}\ControlFlowTok{function}\NormalTok{(c) }\KeywordTok{as.dist}\NormalTok{(}\DecValTok{1}\OperatorTok{{-}}\KeywordTok{cor}\NormalTok{(}\KeywordTok{t}\NormalTok{(c))) ,} - \DataTypeTok{cexCol=}\DecValTok{1}\NormalTok{,}\DataTypeTok{cexRow=}\NormalTok{.}\DecValTok{5}\NormalTok{,}\DataTypeTok{scale =} \StringTok{"row"}\NormalTok{,} - \DataTypeTok{hclustfun=}\ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{hclust}\NormalTok{(x, }\DataTypeTok{method=}\StringTok{"average"}\NormalTok{)} -\NormalTok{ )} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-3-1} \end{adjustwidth} - -\hypertarget{projecting-cogaps-objects}{% -\subsection{Projecting CoGAPS objects}\label{projecting-cogaps-objects}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.ESepiGen4c1l4\textquotesingle{}}\NormalTok{)} -\CommentTok{\#\# Warning in data("p.ESepiGen4c1l4"): data set \textquotesingle{}p.ESepiGen4c1l4\textquotesingle{} not found} -\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.RNAseq6l3c3t\textquotesingle{}}\NormalTok{)} - -\NormalTok{NMF2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{AP,}\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{,} - \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} -\CommentTok{\#\# [1] "93 row names matched between data and loadings"} -\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} - -\NormalTok{dNMF2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(NMF2ESepi),pd.ESepiGen4c1l))} - -\CommentTok{\#plot pca} -\KeywordTok{library}\NormalTok{(ggplot2)} -\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} -\DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} - -\NormalTok{pNMF2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dNMF2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{X1, }\DataTypeTok{y=}\NormalTok{X2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} - \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} - \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{))} - \KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} - \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} - \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} -\CommentTok{\#\# $x} -\CommentTok{\#\# [1] "Projected PC1 (18.36\% of varience)"} -\CommentTok{\#\# } -\CommentTok{\#\# $y} -\CommentTok{\#\# [1] "Projected PC2 (17.15\% of varience)"} -\CommentTok{\#\# } -\CommentTok{\#\# $title} -\CommentTok{\#\# [1] "Encode RNAseq in target PC1 \& PC2"} -\CommentTok{\#\# } -\CommentTok{\#\# attr(,"class")} -\CommentTok{\#\# [1] "labels"} -\end{Highlighting} -\end{Shaded} - -\hypertarget{clustering-projection}{% -\section{Clustering projection}\label{clustering-projection}} - -As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the ``projection'' of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, \texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a \texttt{pclust} object which can serve as input to \texttt{projectR}. Alternatively, the \texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. Both \texttt{cluster2pattern} and \texttt{intersectoR} methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections. - -\hypertarget{cluster2pattern}{% -\subsection{cluster2pattern}\label{cluster2pattern}} - -\texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. - -\begin{verbatim} -library(projectR) -data(p.RNAseq6l3c3t) - - -nP<-5 -kClust<-kmeans(p.RNAseq6l3c3t,centers=nP) -kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t) -kpattern - -cluster2pattern(clusters = NA, NP = NA, data = NA) -\end{verbatim} - -\hypertarget{input-arguments-2}{% -\subsubsection{Input Arguments}\label{input-arguments-2}} - -The inputs that must be set each time are the clusters and data. - -The arguments are as follows: - -\begin{description} -\item[clusters]{a clustering object} -\item[NP]{either the number of clusters desired or the subset of clusters to use} -\item[data]{data used to make clusters object} -\end{description} - -\hypertarget{output-2}{% -\subsubsection{Output}\label{output-2}} - -The output of the \texttt{cluster2pattern} function is a \texttt{pclust} class object; specifically, a matrix of genes (rows) by clusters (columns). A gene's value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene's value is the correlation of the gene's expression to the mean of that cluster. - -\hypertarget{intersector}{% -\subsection{intersectoR}\label{intersector}} - -\texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for \texttt{hclust} and \texttt{kmeans} objects are also available. - -\begin{verbatim} -library(projectR) -intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) -\end{verbatim} - -\hypertarget{input-arguments-3}{% -\subsubsection{Input Arguments}\label{input-arguments-3}} - -The inputs that must be set each time are the clusters and data. - -The arguments are as follows: - -\begin{description} -\item[pSet1]{a list for a set of patterns where each entry is a set of genes associated with a single pattern} -\item[pSet2]{a list for a second set of patterns where each entry is a set of genes associated with a single pattern} -\item[pval]{the maximum p-value considered significant} -\item[full]{logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.} -\item[k]{numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order} -\end{description} - -\hypertarget{output-3}{% -\subsubsection{Output}\label{output-3}} - -The output of the \texttt{intersectoR} function is a summary matrix showing the sets with statistically significant overlap under the specified \(p\)-value threshold based on a hypergeometric test. If \texttt{full==TRUE} the full data frame of significantly overlapping sets will also be returned. - -\hypertarget{correlation-based-projection}{% -\section{Correlation based projection}\label{correlation-based-projection}} - -Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the \texttt{projectR} function. This matrix can be user-generated or the result of the \texttt{correlateR} function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic \texttt{projectR} function. The \texttt{correlateR} function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows. - -\hypertarget{correlater}{% -\subsection{correlateR}\label{correlater}} - -\begin{verbatim} -library(projectR) -correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) -\end{verbatim} - -\hypertarget{input-arguments-4}{% -\subsubsection{Input Arguments}\label{input-arguments-4}} - -The inputs that must be set each time are only the genes and data, with all other inputs having default values. - -The arguments are as follows: - -\begin{description} -\item[genes]{gene or character vector of genes for reference expression pattern dat} -\item[data]{matrix or data frame with genes to be used for to calculate correlation} -\item[threshtype]{Default "R" indicates thresholding by R value or equivalent. Alternatively, "N" indicates a numerical cut off.} -\item[threshold]{numeric indicating value at which to make threshold} -\item[absR]{logical indicating where to include both positive and negatively correlated genes} -\item[...]{addtion imputes to the cor function} -\end{description} - -\hypertarget{output-4}{% -\subsubsection{Output}\label{output-4}} - -The output of the \texttt{correlateR} function is a \texttt{correlateR} class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the \texttt{correlateR} object before using it as input to the \texttt{projectR} function. - -\hypertarget{obtaining-and-visualizing-objects.}{% -\subsection{\texorpdfstring{Obtaining and visualizing \texttt{correlateR} objects.}{Obtaining and visualizing objects.}}\label{obtaining-and-visualizing-objects.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{data}\NormalTok{(}\StringTok{"p.RNAseq6l3c3t"}\NormalTok{)} - -\CommentTok{\# get genes correlated to T} -\NormalTok{cor2T<{-}}\KeywordTok{correlateR}\NormalTok{(}\DataTypeTok{genes=}\StringTok{"T"}\NormalTok{, }\DataTypeTok{dat=}\NormalTok{p.RNAseq6l3c3t, }\DataTypeTok{threshtype=}\StringTok{"N"}\NormalTok{, }\DataTypeTok{threshold=}\DecValTok{10}\NormalTok{, }\DataTypeTok{absR=}\OtherTok{TRUE}\NormalTok{)} -\NormalTok{cor2T <{-}}\StringTok{ }\NormalTok{cor2T}\OperatorTok{@}\NormalTok{corM} -\CommentTok{\#\#\# heatmap of genes more correlated to T} -\NormalTok{indx<{-}}\KeywordTok{unlist}\NormalTok{(}\KeywordTok{sapply}\NormalTok{(cor2T,rownames))} -\NormalTok{indx <{-}}\StringTok{ }\KeywordTok{as.vector}\NormalTok{(indx)} -\KeywordTok{colnames}\NormalTok{(p.RNAseq6l3c3t)<{-}pd.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sampleX} -\KeywordTok{library}\NormalTok{(reshape2)} -\CommentTok{\#\# Warning: package \textquotesingle{}reshape2\textquotesingle{} was built under R version 4.0.5} -\NormalTok{pm.RNAseq6l3c3t<{-}}\KeywordTok{melt}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(p.RNAseq6l3c3t[indx,],indx))} -\CommentTok{\#\# Using indx as id variables} - -\KeywordTok{library}\NormalTok{(gplots)} -\KeywordTok{library}\NormalTok{(ggplot2)} -\KeywordTok{library}\NormalTok{(viridis)} -\CommentTok{\#\# Warning: package \textquotesingle{}viridis\textquotesingle{} was built under R version 4.0.5} -\CommentTok{\#\# Loading required package: viridisLite} -\CommentTok{\#\# Warning: package \textquotesingle{}viridisLite\textquotesingle{} was built under R version 4.0.5} -\NormalTok{pCorT<{-}}\KeywordTok{ggplot}\NormalTok{(pm.RNAseq6l3c3t, }\KeywordTok{aes}\NormalTok{(variable, indx, }\DataTypeTok{fill =}\NormalTok{ value)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom\_tile}\NormalTok{(}\DataTypeTok{colour=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{1.5}\NormalTok{, }\DataTypeTok{stat=}\StringTok{"identity"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale\_fill\_viridis}\NormalTok{(}\DataTypeTok{option=}\StringTok{"B"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+}\StringTok{ }\KeywordTok{ylab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale\_y\_discrete}\NormalTok{(}\DataTypeTok{limits=}\NormalTok{indx) }\OperatorTok{+} -\StringTok{ }\KeywordTok{ggtitle}\NormalTok{(}\StringTok{"Ten genes most highly pos \& neg correlated with T"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(} - \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} - \DataTypeTok{panel.border =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\OtherTok{NA}\NormalTok{,}\DataTypeTok{color=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{0.5}\NormalTok{, }\DataTypeTok{linetype=}\StringTok{"solid"}\NormalTok{),} - \DataTypeTok{panel.grid.major =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{panel.grid.minor =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{axis.line =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{axis.ticks =} \KeywordTok{element\_blank}\NormalTok{(),} - \DataTypeTok{axis.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{),}\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{),} - \DataTypeTok{axis.text.x =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{angle =} \DecValTok{90}\NormalTok{,}\DataTypeTok{vjust=}\NormalTok{.}\DecValTok{5}\NormalTok{),} - \DataTypeTok{legend.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{color=}\StringTok{"white"}\NormalTok{, }\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{)),} - \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} - \DataTypeTok{legend.position =} \StringTok{"bottom"}\NormalTok{,} - \DataTypeTok{legend.title=}\KeywordTok{element\_blank}\NormalTok{()} -\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-5-1} \end{adjustwidth} - -\hypertarget{projecting-correlater-objects.}{% -\subsection{Projecting correlateR objects.}\label{projecting-correlater-objects.}} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{\# data to project into from RNAseq6l3c3t expression data} -\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} - -\KeywordTok{library}\NormalTok{(projectR)} -\NormalTok{cor2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{cor2T[[}\DecValTok{1}\NormalTok{]],}\DataTypeTok{full=}\OtherTok{FALSE}\NormalTok{,} - \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l}\OperatorTok{$}\NormalTok{GeneSymbols)} -\CommentTok{\#\# [1] "9 row names matched between data and loadings"} -\CommentTok{\#\# [1] "Updated dimension of data: 9 9"} -\end{Highlighting} -\end{Shaded} - -\hypertarget{differential-features-identification.}{% -\section{Differential features identification.}\label{differential-features-identification.}} - -\hypertarget{projectiondriver}{% -\subsection{projectionDriveR}\label{projectiondriver}} - -Given loadings that define the weight of features (genes) in a given latent space (e.g.~PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. - -\begin{verbatim} -library(projectR) -projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, - pvalue, pattern_name, display = T, normalize_pattern = T) -\end{verbatim} - -\hypertarget{input-arguments-5}{% -\subsubsection{Input Arguments}\label{input-arguments-5}} - -The required inputs are two feature by sample (e.g.~gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth. - -The arguments for projectionDriveR are: - -\begin{description} -\item[cellgroup1]{Matrix 1 with features as rows, samples as columns.} -\item[cellgroup2]{Matrix 2 with features as rows, samples as columns.} -\item[loadings]{Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space} -\item[loadingsNames]{Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used} -\item[pattern\_name]{the column name of the loadings by which the features will be weighted} -\item[pvalue]{Determines the significance of the confidence interval to be calculated between the difference of means} -\item[display]{Boolean. Whether or not to plot the estimates of significant features. Default = T} -\item[normalize\_pattern]{Boolean. Whether or not to normalize the average feature weight. Default = T} -\end{description} - -\hypertarget{output-5}{% -\subsubsection{Output}\label{output-5}} - -The output of \texttt{projectionDriveR} is a list of length five \texttt{mean\_ci} holds the confidence intervals for the difference in means for all features, \texttt{weighted\_ci} holds the confidence intervals for the weighted difference in means for all features, and normalized\_weights are the weights themselves. In addition, \texttt{significant\_genes} is a vector of gene names that are significantly different at the threshold provided. \texttt{plotted\_ci} returns the ggplot figure of the confidence intervals, see \texttt{plotConfidenceIntervals} for documentation. - -\hypertarget{identifying-differential-features-associated-with-learned-patterns}{% -\subsubsection{Identifying differential features associated with learned patterns}\label{identifying-differential-features-associated-with-learned-patterns}} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{options}\NormalTok{(}\DataTypeTok{width =} \DecValTok{60}\NormalTok{)} -\KeywordTok{library}\NormalTok{(projectR)} -\KeywordTok{library}\NormalTok{(dplyr, }\DataTypeTok{warn.conflicts =}\NormalTok{ F)} -\CommentTok{\#\# Warning: package \textquotesingle{}dplyr\textquotesingle{} was built under R version 4.0.5} - -\CommentTok{\#gene weights x pattern} -\KeywordTok{data}\NormalTok{(}\StringTok{"retinal\_patterns"}\NormalTok{)} - -\CommentTok{\#size{-}normed, log expression} -\KeywordTok{data}\NormalTok{(}\StringTok{"microglial\_counts"}\NormalTok{)} - -\CommentTok{\#size{-}normed, log expression} -\KeywordTok{data}\NormalTok{(}\StringTok{"glial\_counts"}\NormalTok{)} - -\CommentTok{\#the features by which to weight the difference in expression } -\NormalTok{pattern\_to\_weight <{-}}\StringTok{ "Pattern.24"} -\NormalTok{drivers <{-}}\StringTok{ }\KeywordTok{projectionDriveR}\NormalTok{(microglial\_counts, }\CommentTok{\#expression matrix} -\NormalTok{ glial\_counts, }\CommentTok{\#expression matrix} - \DataTypeTok{loadings =}\NormalTok{ retinal\_patterns, }\CommentTok{\#feature x pattern dataframe} - \DataTypeTok{loadingsNames =} \OtherTok{NULL}\NormalTok{,} - \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight, }\CommentTok{\#column name} - \DataTypeTok{pvalue =} \FloatTok{1e{-}5}\NormalTok{, }\CommentTok{\#pvalue before bonferroni correction} - \DataTypeTok{display =}\NormalTok{ T,} - \DataTypeTok{normalize\_pattern =}\NormalTok{ T) }\CommentTok{\#normalize feature weights} -\CommentTok{\#\# [1] "2996 row names matched between datasets"} -\CommentTok{\#\# [1] "2996"} -\CommentTok{\#\# [1] "Updated dimension of data: 2996"} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/projectionDriver-1} \end{adjustwidth} - -\begin{Shaded} -\begin{Highlighting}[] - -\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{mean\_ci[drivers}\OperatorTok{$}\NormalTok{significant\_genes,]} - -\KeywordTok{str}\NormalTok{(conf\_intervals)} -\CommentTok{\#\# \textquotesingle{}data.frame\textquotesingle{}: 253 obs. of 2 variables:} -\CommentTok{\#\# $ low : num 1.86 0.158 {-}0.562 {-}0.756 0.155 ...} -\CommentTok{\#\# $ high: num 2.03943 0.26729 {-}0.00197 {-}0.18521 0.23239 ...} -\end{Highlighting} -\end{Shaded} - -\hypertarget{plotconfidenceintervals}{% -\subsection{plotConfidenceIntervals}\label{plotconfidenceintervals}} - -\hypertarget{input}{% -\subsubsection{Input}\label{input}} - -The arguments for plotConfidenceIntervals are: - -\begin{description} -\item[confidence\_intervals]{A dataframe of features x estimates} -\item[interval\_name]{names of columns that contain the low and high estimates, respectively. -(default: c("low","high"))} -\item[pattern\_name]{string to use as the title for the plots} -\item[sort]{Boolean. Whether or not to sort genes by their estimates (default = T)} -\item[genes]{a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.)} -\item[weights]{weights of features to include as annotation (default = NULL will not include heatmap)} -\item[weights\_clip]{quantile of data to clip color scale for improved visualization (default: 0.99)} -\item[weights\_vis\_norm]{Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none"} -\end{description} - -\hypertarget{output-6}{% -\subsubsection{Output}\label{output-6}} - -A list of the length three that includes confidence interval plots and relevant info. \texttt{ci\_estimates\_plot} is the point-range plot for the provided estimates. If called from within \texttt{projectionDriveR}, the unweighted estimates are used. \texttt{feature\_order} is the vector of gene names in the order shown in the figure. \texttt{weights\_heatmap} is a heatmap annotation of the gene loadings, in the same order as above. - -\hypertarget{customize-plotting-of-confidence-intervals}{% -\subsubsection{Customize plotting of confidence intervals}\label{customize-plotting-of-confidence-intervals}} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{library}\NormalTok{(cowplot)} -\CommentTok{\#\# Warning: package \textquotesingle{}cowplot\textquotesingle{} was built under R version 4.0.5} -\CommentTok{\#order in ascending order of estimates} -\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{conf\_intervals }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{mid =}\NormalTok{ (high}\OperatorTok{+}\NormalTok{low)}\OperatorTok{/}\DecValTok{2}\NormalTok{) }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{arrange}\NormalTok{(mid)} -\NormalTok{gene\_order <{-}}\StringTok{ }\KeywordTok{rownames}\NormalTok{(conf\_intervals)} - -\CommentTok{\#add text labels for top and bottom n genes} -\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name <{-}}\StringTok{ }\OtherTok{NA\_character\_} -\NormalTok{n <{-}}\StringTok{ }\DecValTok{2} -\NormalTok{idx <{-}}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\OperatorTok{:}\NormalTok{n, (}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{]}\OperatorTok{{-}}\NormalTok{(n}\DecValTok{{-}1}\NormalTok{))}\OperatorTok{:}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{])} -\NormalTok{gene\_ids <{-}}\StringTok{ }\NormalTok{gene\_order[idx]} -\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name[idx] <{-}}\StringTok{ }\NormalTok{gene\_ids} - -\CommentTok{\#the labels above can now be used as ggplot aesthetics} -\NormalTok{plots\_list <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(conf\_intervals, }\CommentTok{\#mean difference in expression confidence intervals} - \DataTypeTok{sort =}\NormalTok{ F, }\CommentTok{\#should genes be sorted by estimates} - \DataTypeTok{weights =}\NormalTok{ drivers}\OperatorTok{$}\NormalTok{normalized\_weights[}\KeywordTok{rownames}\NormalTok{(conf\_intervals)],} - \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight,} - \DataTypeTok{weights\_clip =} \FloatTok{0.99}\NormalTok{,} - \DataTypeTok{weights\_vis\_norm =} \StringTok{"none"}\NormalTok{)} - -\NormalTok{pl1 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} -\StringTok{ }\NormalTok{ggrepel}\OperatorTok{::}\KeywordTok{geom\_label\_repel}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{label =}\NormalTok{ label\_name), }\DataTypeTok{max.overlaps =} \DecValTok{20}\NormalTok{, }\DataTypeTok{force =} \DecValTok{50}\NormalTok{)} - -\NormalTok{pl2 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"weights\_heatmap"}\NormalTok{]]} - -\CommentTok{\#now plot the weighted differences} -\NormalTok{weighted\_conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{weighted\_mean\_ci[gene\_order,]} -\NormalTok{plots\_list\_weighted <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(weighted\_conf\_intervals,} - \DataTypeTok{sort =}\NormalTok{ F,} - \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight)} - -\NormalTok{pl3 <{-}}\StringTok{ }\NormalTok{plots\_list\_weighted[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} -\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{"Difference in weighted group means"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.title.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.ticks.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.text.y =} \KeywordTok{element\_blank}\NormalTok{())} - -\NormalTok{cowplot}\OperatorTok{::}\KeywordTok{plot\_grid}\NormalTok{(pl1, pl2, pl3, }\DataTypeTok{align =} \StringTok{"h"}\NormalTok{, }\DataTypeTok{rel\_widths =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,.}\DecValTok{4}\NormalTok{, }\DecValTok{1}\NormalTok{), }\DataTypeTok{ncol =} \DecValTok{3}\NormalTok{)} -\CommentTok{\#\# Warning: Removed 249 rows containing missing values} -\CommentTok{\#\# (geom\_label\_repel).} -\end{Highlighting} -\end{Shaded} - -\begin{adjustwidth}{\fltoffset}{0mm} -\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-7-1} \end{adjustwidth} - -\hypertarget{refs}{} -\begin{cslreferences} -\leavevmode\hypertarget{ref-Barbakh:2009bw}{}% -Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. ``Review of Linear Projection Methods.'' In \emph{Non-Standard Parameter Adaptation for Exploratory Data Analysis}, 29--48. Berlin, Heidelberg: Springer Berlin Heidelberg. - -\leavevmode\hypertarget{ref-Sibisi1997}{}% -Sibisi, Sibusiso, and John Skilling. 1997. ``Prior Distributions on Measure Space.'' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)} 59 (1): 217--35. \url{https://doi.org/10.1111/1467-9868.00065}. - -\leavevmode\hypertarget{ref-Ochs2006}{}% -Wang, Guoli, Andrew V. Kossenkov, and Michael F. Ochs. 2006. ``LS-Nmf: A Modified Non-Negative Matrix Factorization Algorithm Utilizing Uncertainty Estimates.'' \emph{BMC Bioinformatics} 7 (1): 175. \url{https://doi.org/10.1186/1471-2105-7-175}. -\end{cslreferences} - - -\end{document} From 3f37b5342cf17bdd41c4139e8490e918e9c45aa9 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 17 Jan 2024 14:57:18 -0500 Subject: [PATCH 19/33] update namespace --- NAMESPACE | 1 + R/projectionDriveRfun.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 43a7bea..0e4c860 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ import(scales, except = viridis_pal) import(tsne) import(umap) import(viridis) +importFrom(Matrix,as.matrix) importFrom(NMF,fcnnls) importFrom(ROCR,performance) importFrom(ROCR,prediction) diff --git a/R/projectionDriveRfun.R b/R/projectionDriveRfun.R index addcc4a..d4f907a 100644 --- a/R/projectionDriveRfun.R +++ b/R/projectionDriveRfun.R @@ -137,6 +137,7 @@ bonferroniCorrectedDifferences <- function( #' Calculate the weighted difference in expression between two groups (group1 - group2) #' #' @importFrom cowplot plot_grid +#' @importFrom Matrix as.matrix #' @param cellgroup1 gene x cell count matrix for cell group 1 #' @param cellgroup2 gene x cell count matrix for cell group 2 #' @param loadings A matrix of continuous values defining the features @@ -182,7 +183,7 @@ projectionDriveR<-function( #pattern weights must be formatted as a matrix for normalization if(pattern_name %in% colnames(loadings)){ pattern <- loadings[,pattern_name, drop = F] #data.frame - pattern <- as.matrix(pattern) + pattern <- Matrix::as.matrix(pattern) } else { stop(paste0("Provided pattern_name ",pattern_name, " is not a column in provided loadings")) } From 7f43d38f0301469aed88e4b5cb5fc1f7038e6446 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 17 Jan 2024 15:35:05 -0500 Subject: [PATCH 20/33] Update plotting.R Removed plot print --- R/plotting.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 9fc616e..f7ba37c 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -275,9 +275,6 @@ pdVolcano <- function(result, axis.title=element_text(size=14), legend.text = element_text(size=12)) - plt <- cowplot::plot_grid(unweightedvolcano, weightedvolcano, ncol = 2, align = "h") - print(plt) - #return a list of genes that can be used as input to fgsea difexdf <- subset(mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) vec <- difexdf$estimate @@ -295,7 +292,8 @@ pdVolcano <- function(result, fgseavecs = list(unweightedvec = vec, weightedvec = weighted_vec), meta_data = metadata, - plt = plt) + plt = list(differential_expression = unweightedvolcano, + weighted_differential_expression = weightedvolcano)) return(vol_result) } From 1b9d5573ee3b7ea6a9e07056be80042b391577fa Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Fri, 19 Jan 2024 16:49:12 -0500 Subject: [PATCH 21/33] refactor to match new bioc --- DESCRIPTION | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c509f99..8c6f9e4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,14 @@ Title: Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering Version: 1.19.01 Author: Gaurav Sharma, Charles Shin, Jared Slosberg, Loyal Goff, Genevieve Stein-O'Brien -Maintainer: Genevieve Stein-O'Brien +Authors@R: c( + person("Gaurav", "Sharma", role = c("aut")), + person("Charles", "Shin", role = c("aut")), + person("Jared", "Slosberg", role = c("aut")), + person("Loyal", "Goff", role = c("aut")), + person("Ryan", "Palaganas", role = c("aut")), + person("Genevieve", "Stein-O'Brien", role = c("aut", "cre" ), email = "gsteinobrien@gmail.com") + ) Description: Functions for the projection of data into the spaces defined by PCA, CoGAPS, NMF, correlation, and clustering. License: GPL (==2) Imports: From abe8c9d1785ffd5d784beaa467a5398a2254f79e Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Thu, 15 Feb 2024 19:24:03 -0500 Subject: [PATCH 22/33] Updated functions to reflect code review Improved style consistency, incorporated lintr Parameterized unweighted/weighted volcanos to one function Updated vignette with more projectionDriveR examples --- .lintr | 4 + DESCRIPTION | 4 +- NAMESPACE | 3 + R/package.R | 10 + R/plotting.R | 156 ++++--- R/projectionDriveRfun.R | 217 +++++----- data/cr_microglial.rda | Bin 0 -> 182969 bytes man/cr_microglial.Rd | 17 + man/pdVolcano.Rd | 7 +- man/plotVolcano.Rd | 22 + tests/testthat/test_projectR.R | 14 +- vignettes/projectR.Rmd | 108 ++++- vignettes/projectR.tex | 758 +++++++++++++++++++++++++++++++++ 13 files changed, 1123 insertions(+), 197 deletions(-) create mode 100644 .lintr create mode 100644 data/cr_microglial.rda create mode 100644 man/cr_microglial.Rd create mode 100644 man/plotVolcano.Rd create mode 100644 vignettes/projectR.tex diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..d496296 --- /dev/null +++ b/.lintr @@ -0,0 +1,4 @@ +linters: all_linters() +exclusions: list("projectR/tests/testthat/test_projectR.R", + "projectR/tests/testthat.R") +encoding: "UTF-8" diff --git a/DESCRIPTION b/DESCRIPTION index 8c6f9e4..4805ad7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,11 +24,13 @@ Imports: ggalluvial, RColorBrewer, dplyr, + fgsea, reshape2, viridis, scales, Matrix, MatrixModels, + msigdbr, ggplot2, cowplot, ggrepel, @@ -48,7 +50,7 @@ Suggests: SeuratObject LazyData: TRUE LazyDataCompression: gzip -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Encoding: UTF-8 VignetteBuilder: knitr biocViews: FunctionalPrediction, GeneRegulation, BiologicalQuestion, Software diff --git a/NAMESPACE b/NAMESPACE index 0e4c860..273c371 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(intersectoR) export(multivariateAnalysisR) export(pdVolcano) export(plotConfidenceIntervals) +export(plotVolcano) export(projectR) export(projectionDriveR) export(rotatoR) @@ -22,9 +23,11 @@ import(MatrixModels) import(RColorBrewer) import(cluster) import(dplyr) +import(fgsea) import(ggalluvial) import(ggplot2) import(limma) +import(msigdbr) import(reshape2) import(scales, except = viridis_pal) import(tsne) diff --git a/R/package.R b/R/package.R index d9c261b..367306e 100644 --- a/R/package.R +++ b/R/package.R @@ -95,6 +95,16 @@ #' @format A CogapsResult object "CR.RNAseq6l3c3t" +#' CogapsResult object for microglial_counts +#' +#' cr_microglia contains the output of the CoGAPS function in the +#' CoGAPS package for data = microglial_counts +#' +#' @name cr_microglial +#' @docType data +#' @format A CogapsResult object +"cr_microglial" + #' CoGAPS patterns learned from the developing mouse retina. #' #' @references diff --git a/R/plotting.R b/R/plotting.R index f7ba37c..53c1118 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,14 +1,3 @@ -# Example plots to be functionalized -# devtools::install_github('rlbarter/superheat') -# -# test<-projectR(data=p.ESepiGen4c1l$mRNA.Seq,Patterns=AP.RNAseq6l3c3t$Amean,AnnotionObj=map.ESepiGen4c1l,IDcol="GeneSymbols",full=TRUE) -# -# tmp<-matrix("",nrow = 5,ncol=9) -# tmp[test$pval<0.01]<-"*" -# -# superheat(test$projection,row.dendrogram=TRUE, pretty.order.cols = TRUE, -# heat.pal.values = c(0, 0.5, 1),yt=colSums(test$projection),yt.plot.type='scatterline',yt.axis.name="Sum of\nProjections",X.text=tmp,X.text.size=8,bottom.label.text.angle = 90) -# ####################################################################################################################################### #' #' plotConfidenceIntervals @@ -132,20 +121,69 @@ plotConfidenceIntervals <- function( "feature_order" = rownames(confidence_intervals), "weights_heatmap" = wt_heatmap)) } +####################################################################################################################################### +#' plotVolcano +#' +#' Volcano plotting function +#' @param stats data frame with differential expression statistics +#' @param metadata #metadata from pdVolcano +#' @param FC Fold change threshold +#' @param pvalue p value threshold +#' @param title plot title +#' @export + +plotVolcano<-function( + stats, #pdVolcano stats dataframe + metadata, #metadata from pdVolcano + FC, + pvalue, + title +){ + + #set custom colors + myColors <- c("gray","red","dodgerblue") + names(myColors) <- levels(stats$Color) + custom_colors <- scale_color_manual(values = myColors, drop = FALSE) + + #plot + volcano <- ggplot(data = stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = stats$label)) + + geom_vline(xintercept = c(FC, -FC), lty = "dashed") + + geom_hline(yintercept = -log10(pvalue), lty = "dashed") + + geom_point(na.rm = TRUE) + + custom_colors + + coord_cartesian(ylim = c(0, 350), xlim = c(min(stats$mean_diff), max(stats$mean_diff))) + + ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black", + min.segment.length = .1, box.padding = 0.15, + max.overlaps = Inf, na.rm = TRUE) + + labs(x = "FC", + y = "Significance (-log10pval)", + color = NULL) + + ggtitle(paste(title)) + + theme_bw() + + theme(plot.title = element_text(size = 16), + legend.position = "bottom", + axis.title=element_text(size=14), + legend.text = element_text(size=12)) + return(volcano) +} + ####################################################################################################################################### #' pdVolcano #' #' Generate volcano plot and gate genes based on fold change and pvalue, includes vectors that can be used with fast gene set enrichment (fgsea) -#' @param result result output from projectionDriveR function with PI method selected +#' @param result result output from projectionDriveR function with PV mode selected #' @param FC fold change threshold, default at 0.2 #' @param pvalue significance threshold, default set to pvalue stored in projectionDriveR output #' @param subset vector of gene names to subset the plot by #' @param filter.inf remove genes that have pvalues below machine double minimum value #' @param label.num Number of genes to label on either side of the volcano plot, default 5 +#' @param display boolean. Whether or not to plot and display volcano plots #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @importFrom ggrepel geom_text_repel +#' @import msigdbr +#' @import fgsea #' @import dplyr #' @return A list with weighted and unweighted differential expression metrics #' @export @@ -155,7 +193,8 @@ pdVolcano <- function(result, pvalue = NULL, subset = NULL, filter.inf = FALSE, - label.num = 5) { + label.num = 5, + display = T) { #if a genelist is provided, use them to subset the output of projectiondrivers if(!is.null(subset)){ @@ -179,7 +218,20 @@ pdVolcano <- function(result, } if(is.null(pvalue) == FALSE) { - pvalue = pvalue + message('Updating sig_genes...') + #update previously stored pvalue + pvalue <- pvalue + result$meta_data$pvalue <- pvalue + #update sig_genes with new pvalue + #recreate vector of significant genes from weighted and unweighted tests + weighted_PV_sig <- rownames(result$weighted_mean_stats[which(result$weighted_mean_stats$welch_padj <= pvalue),]) + PV_sig <- rownames(result$mean_stats[which(result$mean_stats$welch_padj <= pvalue),]) + #create vector of significant genes shared between weighted and unweighted tests + shared_genes_PV <- base::intersect( + PV_sig, weighted_PV_sig) + result$sig_genes <- list(PV_sig = PV_sig, + weighted_PV_sig = weighted_PV_sig, + PV_significant_shared_genes = shared_genes_PV) } else { pvalue <- result$meta_data$pvalue } @@ -191,11 +243,11 @@ pdVolcano <- function(result, #extract unweighted confidence intervals / statistics mean_stats <- result$mean_stats #fold change / significance calls - mean_stats$Color <- paste("NS or FC", FC) + mean_stats$Color <- paste("NS or FC <", FC) mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix) mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix) mean_stats$Color <- factor(mean_stats$Color, - levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix))) + levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix))) #label the most significant genes for enrichment mean_stats$invert_P <- (-log10(mean_stats$welch_padj)) * (mean_stats$mean_diff) @@ -207,32 +259,8 @@ pdVolcano <- function(result, mean_stats$label <- NA mean_stats$label[top_indices] <- paste(rownames(mean_stats)[top_indices]) mean_stats$label[bottom_indices] <- paste(rownames(mean_stats)[bottom_indices]) - - #set custom colors - myColors <- c("gray","red","dodgerblue") - names(myColors) <- levels(mean_stats$Color) - custom_colors <- scale_color_manual(values = myColors, drop = FALSE) - - #plot - unweightedvolcano = ggplot(data = mean_stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = label)) + - geom_vline(xintercept = c(FC, -FC), lty = "dashed") + - geom_hline(yintercept = -log10(pvalue), lty = "dashed") + - geom_point(na.rm = TRUE) + - custom_colors + - coord_cartesian(ylim = c(0, 350), xlim = c(-2, 2)) + - ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black", - min.segment.length = .1, box.padding = 0.15, - max.overlaps = Inf, na.rm = TRUE) + - labs(x = "FC", - y = "Significance (-log10pval)", - color = NULL) + - ggtitle("Differential Expression") + - theme_bw() + - theme(plot.title = element_text(size = 16), - legend.position = "bottom", - axis.title=element_text(size=14), - legend.text = element_text(size=12)) - + #unweighted volcano plot + unweightedvolcano <- plotVolcano(stats = mean_stats, metadata = metadata, FC = FC, pvalue = pvalue, title = "Differential Enrichment") #weighted volcano plot weighted_mean_stats <- result$weighted_mean_stats weighted_mean_stats$Color <- paste("NS or FC <", FC) @@ -244,37 +272,17 @@ pdVolcano <- function(result, weighted_mean_stats$invert_P <- (-log10(weighted_mean_stats$welch_padj)) * (weighted_mean_stats$mean_diff) - top_indices <- order(weighted_mean_stats$invert_P, decreasing = TRUE)[1:label.num] - bottom_indices <- order(weighted_mean_stats$invert_P)[1:label.num] + top_indices_w <- order(weighted_mean_stats$invert_P, decreasing = TRUE)[1:label.num] + bottom_indices_w <- order(weighted_mean_stats$invert_P)[1:label.num] # Add labels to the dataframe weighted_mean_stats$label <- NA - weighted_mean_stats$label[top_indices] <- paste(rownames(weighted_mean_stats)[top_indices]) - weighted_mean_stats$label[bottom_indices] <- paste(rownames(weighted_mean_stats)[bottom_indices]) - - myColors <- c("gray","red","dodgerblue") - names(myColors) <- levels(weighted_mean_stats$Color) - custom_colors <- scale_color_manual(values = myColors, drop = FALSE) - - weightedvolcano = ggplot(data = weighted_mean_stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = label)) + - geom_vline(xintercept = c(FC, -FC), lty = "dashed") + - geom_hline(yintercept = -log10(pvalue), lty = "dashed") + - geom_point(na.rm = TRUE) + - custom_colors + - coord_cartesian(ylim = c(0, 350), xlim = c(-2, 2)) + - ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black", - min.segment.length = .1, box.padding = 0.15, - max.overlaps = Inf, na.rm = TRUE) + - labs(x = "FC", - y = "Significance (-log10pval)", - color = NULL) + - ggtitle("Weighted Differential Expression") + - theme_bw() + - theme(plot.title = element_text(size = 16), - legend.position = "bottom", - axis.title=element_text(size=14), - legend.text = element_text(size=12)) + weighted_mean_stats$label[top_indices_w] <- paste(rownames(weighted_mean_stats)[top_indices_w]) + weighted_mean_stats$label[bottom_indices_w] <- paste(rownames(weighted_mean_stats)[bottom_indices_w]) + #weighted volcano plot + weightedvolcano <- plotVolcano(stats = weighted_mean_stats, FC = FC, pvalue = pvalue, title = "Weighted Differential Enrichment") + #return a list of genes that can be used as input to fgsea difexdf <- subset(mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) vec <- difexdf$estimate @@ -286,6 +294,7 @@ pdVolcano <- function(result, names(vec) <- rownames(difexdf) vol_result <- list(mean_stats = mean_stats, weighted_mean_stats = weighted_mean_stats, + normalized_weights = result$normalized_weights, sig_genes = result$sig_genes, difexpgenes = difexdf, weighted_difexpgenes = weighted_difexdf, @@ -294,6 +303,15 @@ pdVolcano <- function(result, meta_data = metadata, plt = list(differential_expression = unweightedvolcano, weighted_differential_expression = weightedvolcano)) + if(display == TRUE){ + #print volcano plots + pltgrid <- cowplot::plot_grid(vol_result$plt$differential_expression + theme(legend.position = "none"), + vol_result$plt$weighted_differential_expression + theme(legend.position = "none"), + ncol = 2, align = "h") + legend <- cowplot::get_legend(vol_result$plt$differential_expression + guides(color = guide_legend(nrow = 1)) +theme(legend.position = "bottom")) + plt <- cowplot::plot_grid(pltgrid, legend, ncol = 1, rel_heights = c(1, .1)) + print(plt) + } return(vol_result) } diff --git a/R/projectionDriveRfun.R b/R/projectionDriveRfun.R index d4f907a..717c0dd 100644 --- a/R/projectionDriveRfun.R +++ b/R/projectionDriveRfun.R @@ -1,153 +1,131 @@ -####################################################################################################################################### +################################################################################ #' bonferroniCorrectedDifferences #' -#' Calculate the weighted and unweighted difference in means for each measurement between two groups. +#' Calculate weighted/unweighted mean difference for each gene between 2 groups #' @param group1 count matrix 1 #' @param group2 count matrix 2 -#' @param diff_weights loadings to weight the differential expression between the groups #' @param pvalue significance value to threshold -#' @param mode user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI +#' @param diff_weights loadings to weight the differential expression +#' @param mode statistical approach, confidence intervals(CI) or pvalues(PV) #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @import dplyr -bonferroniCorrectedDifferences <- function( +bonferronicorrecteddifferences <- function( group1, - group2, + group2, + pvalue, diff_weights = NULL, - mode = "CI", - pvalue) - - { - #if passed from projectionDrivers, cellgroup1 and cellgroup 1 will have the same rows (genes) - if(!(dim(group1)[1] == dim(group2)[1])){ + mode = "CI") { + if (!(dim(group1)[[1L]] == dim(group2)[[1L]])) { + #if passed from projectionDrivers, cellgroups will have the same rows stop("Rows of two cell group matrices are not identical") } - - if(any(is.na(group1)) | any(is.na(group2))){ + + if (anyNA(group1) || anyNA(group2)) { stop("NA values in count matrices not allowed") } ##Take means over all genes and calculate differences - group1_mean <- apply(group1, 1, mean) - group2_mean <- apply(group2, 1, mean) - mean_diff <- group1_mean - group2_mean #if this is log normalized counts the mean difference is actually log(group1/group2) - - + group1_mean <- rowMeans(group1) + group2_mean <- rowMeans(group2) + mean_diff <- group1_mean - group2_mean + + #if weights are provided, use them to weight the difference in means - if(!is.null(diff_weights)){ - + if (!is.null(diff_weights)) { + #check that genes exactly match between difference vector and weight vector - if(!(all(names(mean_diff) == names(diff_weights)))){ + if (!(all(names(mean_diff) == names(diff_weights)))) { stop("Names of loadings and counts do not match") } - + mean_diff <- mean_diff * diff_weights } - + ##Stats and corrections beginning here #calculate confidence intervals dimensionality <- length(mean_diff) #number of measurements (genes) - - n1_samples <- dim(group1)[2] #number of samples (cells) - n2_samples <- dim(group2)[2] - bon_correct <- pvalue / (2*dimensionality) #bonferroni correction - qval <- 1 - bon_correct - - tval <- qt(p = qval, df = n1_samples + n2_samples -2) #critical value - - group1_var <- apply(group1, 1, var) #variance of genes across group 1 - group2_var <- apply(group2, 1, var) #variance of genes across group 2 - + n1_samples <- dim(group1)[[2L]] #number of samples (cells) + n2_samples <- dim(group2)[[2L]] + bon_correct <- pvalue / (2L * dimensionality) #bonferroni correction + qval <- 1L - bon_correct - if(mode == "CI") { - + tval <- qt(p = qval, df = n1_samples + n2_samples - 2L) #critical value + + group1_var <- apply(group1, 1L, var) #variance of genes across group 1 + group2_var <- apply(group2, 1L, var) #variance of genes across group 2 + + + + if (mode == "CI") { #pooled standard deviation - pooled <- ((n1_samples-1)*group1_var + (n2_samples-1)*group2_var) / (n1_samples+n2_samples-2) - - #establish dataframe to populate in the following for loop - plusminus = data.frame(low = rep(NA_integer_, dimensionality), - high = rep(NA_integer_, dimensionality), - gene = rep(NA_integer_, dimensionality)) + pool <- ((n1_samples - 1L) * group1_var + (n2_samples - 1L) * group2_var) / + (n1_samples + n2_samples - 2L) + plusminus <- data.frame(low = mean_diff - tval * + sqrt(pool * (1L / n1_samples + 1L / n2_samples)), + high = mean_diff + tval * + sqrt(pool * (1L / n1_samples + 1L / n2_samples)), + gene = names(mean_diff)) rownames(plusminus) <- names(mean_diff) - - #for each gene, calculate confidence interval around mean - for(i in 1:dimensionality){ - - scale = tval * sqrt(pooled[i] * (1/n1_samples + 1/n2_samples)) - - plusminus[i, "low"] <- mean_diff[i] - scale #low estimate - plusminus[i, "high"] <- mean_diff[i] + scale #high estimate - plusminus[i, "gene"] <- names(mean_diff[i]) #gene names for easy sorting - } - + } else if (mode == "PV") { #welch t test - #vartest <- group1_var / group2_var #test to see if variance across groups is equal, often not equal #variance calculation - deltaS <- sqrt((group1_var / n1_samples) + (group2_var / n2_samples)) + delta_s <- sqrt((group1_var / n1_samples) + (group2_var / n2_samples)) #welch t statistic, rounded to 10 digits to avoid infinite decimals - welch_estimate <- round(mean_diff / deltaS, digits = 10) + welch_estimate <- round(mean_diff / delta_s, digits = 10L) #Welch-Satterthwaite equation for degrees of freedom - df <- (((group1_var / n1_samples) + (group2_var / n2_samples))^2) / ((((group1_var / n1_samples)^2) / (n1_samples - 1)) + (((group2_var / n2_samples)^2) / (n2_samples - 1))) + df <- (((group1_var / n1_samples) + (group2_var / n2_samples)) ^ 2L) / + ((((group1_var / n1_samples) ^ 2L) / (n1_samples - 1L)) + + (((group2_var / n2_samples) ^ 2L) / (n2_samples - 1L))) #calculate p value from estimate/tvalue - welch_pvalue <- 2*pt(-abs(welch_estimate), df=df) + welch_pvalue <- 2L * pt(-abs(welch_estimate), df = df) #bonferroni correction - welch_padj <- p.adjust(welch_pvalue, method = "bonferroni", n = dimensionality) - - # replace p values equal to zero with the smallest machine value possible - if (min(welch_padj, na.rm=TRUE) <= .Machine$double.xmin) { - zp <- length(which(welch_padj <= .Machine$double.xmin)) - warning(paste(zp,"P value(s) equal 0.", - "Converting any values less than", .Machine$double.xmin, "to minimum possible value..."), + welch_padj <- p.adjust(welch_pvalue, + method = "bonferroni", + n = dimensionality) + #replace p values equal to zero with the smallest machine value possible + if (min(welch_padj, na.rm=TRUE) <= .Machine[[double.xmin]]) { + zp <- length(which(welch_padj <= .Machine[[double.xmin]])) + warning(zp, " P value(s) equal 0. Converting values less than ", + .Machine[[double.xmin]], " to minimum possible value...", call. = FALSE) - welch_padj[welch_padj <= .Machine$double.xmin] <- .Machine$double.xmin - } - #establish dataframe to populate in the following for loop - plusminus = data.frame(ref_mean = rep(NA_integer_, dimensionality), - test_mean = rep(NA_integer_, dimensionality), - mean_diff = rep(NA_integer_, dimensionality), - estimate = rep(NA_integer_, dimensionality), - welch_pvalue = rep(NA_integer_, dimensionality), - welch_padj = rep(NA_integer_, dimensionality), - gene = rep(NA_integer_, dimensionality)) - rownames(plusminus) <- names(mean_diff) - - #input stats gene-wise - for(i in 1:dimensionality){ - plusminus[i, "ref_mean"] <- group2_mean[i] - plusminus[i, "test_mean"] <- group1_mean[i] - plusminus[i, "mean_diff"] <- mean_diff[i] - plusminus[i, "estimate"] <- welch_estimate[i] - plusminus[i, "welch_pvalue"] <- welch_pvalue[i] - plusminus[i, "welch_padj"] <- welch_padj[i] - plusminus[i, "gene"] <- names(mean_diff[i]) + welch_padj[welch_padj <= .Machine[[double.xmin]]] <- .Machine[[double.xmin]] } + plusminus <- data.frame( + ref_mean = group2_mean, + test_mean = group1_mean, + mean_diff = mean_diff, + estimate = welch_estimate, + welch_pvalue = welch_pvalue, + welch_padj = welch_padj, + gene = names(mean_diff) + ) } else { stop("Invalid mode selection") } return(plusminus) } - - -####################################################################################################################################### +################################################################################ #' projectionDriveR #' -#' Calculate the weighted difference in expression between two groups (group1 - group2) +#' Calculate weighted expression difference between two groups (group1 - group2) #' #' @importFrom cowplot plot_grid #' @importFrom Matrix as.matrix #' @param cellgroup1 gene x cell count matrix for cell group 1 #' @param cellgroup2 gene x cell count matrix for cell group 2 #' @param loadings A matrix of continuous values defining the features -#' @param pattern_name column of loadings for which drivers will be calculated. -#' @param pvalue confidence level for the bonferroni confidence intervals. Default 1e-5 -#' @param loadingsNames a vector with names of loading rows. Defaults to rownames. -#' @param display boolean. Whether or not to plot and display confidence intervals -#' @param normalize_pattern Boolean. Whether or not to normalize pattern weights. -#' @param mode user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI -#' @return A list with weighted mean differences, mean differences, and differential genes that meet the provided signficance threshold. +#' @param pattern_name column of loadings for which drivers will be calculated +#' @param pvalue confidence level. Default 1e-5 +#' @param loadingsNames a vector with names of loading rows defaults to rownames +#' @param display boolean. Whether or not to display confidence intervals +#' @param normalize_pattern Boolean. Whether or not to normalize pattern weights +#' @param mode statistical approach, confidence intervals or pvalues. default CI +#' @return A list with unweighted/weighted mean differences and differential +#' genes that meet the provided signficance threshold. #' @export #' #' @@ -187,7 +165,7 @@ projectionDriveR<-function( } else { stop(paste0("Provided pattern_name ",pattern_name, " is not a column in provided loadings")) } - print(paste("Mode:",mode)) + message("Mode: ",mode) #extract names of data objects group1name <- deparse(substitute(cellgroup1)) @@ -196,7 +174,7 @@ projectionDriveR<-function( #Filter the two count matrices and the pattern weights to include the intersection of their features #shared rows in two data matrices filtered_data <-geneMatchR(data1=cellgroup1, data2=cellgroup2, data1Names=NULL, data2Names=NULL, merge=FALSE) - print(paste(as.character(dim(filtered_data[[2]])[1]),'row names matched between datasets')) + message(as.character(dim(filtered_data[[2]])[1]),' row names matched between datasets') cellgroup1 <- filtered_data[[2]] #geneMatchR flips the indexes cellgroup2 <- filtered_data[[1]] @@ -206,7 +184,7 @@ projectionDriveR<-function( filtered_weights <- geneMatchR(data1 = cellgroup1, data2 = pattern, data1Names = NULL, data2Names = NULL, merge = F) dimensionality_final <- dim(filtered_weights[[2]])[1] - print(paste('Updated dimension of data:',as.character(paste(dimensionality_final, collapse = ' ')))) + message('Updated dimension of data: ',as.character(paste(dimensionality_final, collapse = ' '))) if(dimensionality_final == 0){ stop("No features matched by rownames of count matrix and rownames of loadings") @@ -231,13 +209,13 @@ projectionDriveR<-function( names(pattern_normalized_vec) <- rownames(pattern_filtered) #weighted confidence intervals of differences in cluster means - weighted_drivers_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, + weighted_drivers_bonferroni <- bonferronicorrecteddifferences(group1 = cellgroup1_filtered, group2 = cellgroup2_filtered, diff_weights = pattern_normalized_vec, pvalue = pvalue, mode = mode) #unweighted confidence intervals of difference in cluster means - mean_bonferroni <- bonferroniCorrectedDifferences(group1 = cellgroup1_filtered, + mean_bonferroni <- bonferronicorrecteddifferences(group1 = cellgroup1_filtered, group2 = cellgroup2_filtered, diff_weights = NULL, pvalue = pvalue, @@ -284,35 +262,34 @@ projectionDriveR<-function( ncol = 2, align = "h", rel_widths = c(1,.3))) - #print(pl1_u) pl2_w <- (cowplot::plot_grid(pl_w[["ci_estimates_plot"]], pl_w[["weights_heatmap"]], ncol = 2, align = "h", rel_widths = c(1,.3))) - #print(pl2_w) plt <- cowplot::plot_grid(pl1_u, pl2_w, ncol = 2, align = "h") print(plt) } if(length(shared_genes) == 0){ #no genes were significant. Return info we have and skip plotting. - warning("No features (and weighted features) were significantly differentially used between the two groups") + warning("No features (and weighted features) were significantly differentially used between the two groups", call. = FALSE) result <- list(mean_ci = mean_bonferroni, weighted_mean_ci = weighted_drivers_bonferroni, + normalized_weights = pattern_normalized_vec, significant_shared_genes = shared_genes, plotted_ci = NULL, - sig_genes = list(unweighted_sig_genes = rownames(unweighted_sig_genes), - weighted_sig_genes = rownames(weighted_sig_genes)), meta_data = list(reference_matrix = paste0(group2name), test_matrix = paste0(group1name)) ) + return(result) } result <- list( mean_ci = mean_bonferroni, weighted_mean_ci = weighted_drivers_bonferroni, + normalized_weights = pattern_normalized_vec, sig_genes = list(unweighted_sig_genes = rownames(unweighted_sig_genes), weighted_sig_genes = rownames(weighted_sig_genes), significant_shared_genes = shared_genes), @@ -327,8 +304,21 @@ projectionDriveR<-function( #create vector of significant genes shared between weighted and unweighted tests shared_genes_PV <- base::intersect( PV_sig, weighted_PV_sig) + if(length(shared_genes_PV) == 0){ + #no genes were significant. Return info we have and skip plotting. + warning("No features (and weighted features) were significantly differentially used between the two groups", call. = FALSE) + result <- list(mean_stats = mean_bonferroni, + weighted_mean_stats = weighted_drivers_bonferroni, + normalized_weights = pattern_normalized_vec, + meta_data = list(reference_matrix = paste0(group2name), + test_matrix = paste0(group1name), + pvalue = pvalue) + ) + return(result) + } result <- list(mean_stats = mean_bonferroni, weighted_mean_stats = weighted_drivers_bonferroni, + normalized_weights = pattern_normalized_vec, sig_genes = list(PV_sig = PV_sig, weighted_PV_sig = weighted_PV_sig, PV_significant_shared_genes = shared_genes_PV), @@ -336,6 +326,17 @@ projectionDriveR<-function( test_matrix = paste0(group1name), pvalue = pvalue) ) + #apply pdVolcano function to result + result <- pdVolcano(result, display = FALSE) + if(display){ + #print volcano plots + pltgrid <- cowplot::plot_grid(result$plt$differential_expression + theme(legend.position = "none"), + result$plt$weighted_differential_expression + theme(legend.position = "none"), + ncol = 2, align = "h") + legend <- cowplot::get_legend(result$plt$differential_expression + guides(color = guide_legend(nrow = 1)) +theme(legend.position = "bottom")) + plt <- cowplot::plot_grid(pltgrid, legend, ncol = 1, rel_heights = c(1, .1)) + print(plt) + } } else { stop("Invalid mode selection") } diff --git a/data/cr_microglial.rda b/data/cr_microglial.rda new file mode 100644 index 0000000000000000000000000000000000000000..6d7f94b60142021417b4de4bff94a88fa5476475 GIT binary patch literal 182969 zcmW(+XH-*7*L?&P5dj6I7wIBW1Sz2^s39Vtp!6zDdXo-`D2fPD1EEI*1f=(#ARrK0 zl-@%LgdR#LA?@S+?w|W-)~tJG_TK00du9YM;iu_ zPLYK{EQI5FTJ{c6QxB8w~?u(Qp518 zbEArQ06Juuq+)0;SGMutXTauIi7l@1>}Ye$dvBXI;VNgS;n}gS8esG5 zI+N|OoXp8&2$!iV`Q0u!U~>Y03a1-tpVy!>;kKqzFABaiUSf<-RSnqN(oGzVQ6i4L>^OjHpY>s>SHqmL&K(&XxJcTlXk$Y`)| zK-BCEh-;a1+Pc5KEDgN;v`u`!`xlChwZ-d=dVJ+G((fe=S4e>6%=D~HKZCNS)XFa5M+0A_AY}%(vKHi2jh{Ga#dBZRdBnIaWH8vFQ5QV;`EH4_ z*NT&?#OV_Ku`EtyYpSw-46k^Y-Cdu*Q%D^&#)-upTy?_42fVTQU3vv#ar@|l;S=5V zzF1ih(T|1hkA|2B@-Q~z!(vrnHPKm46IDJEb-Ylu@W2+FI25q{4m z=iWSFJyf+EZm}XiK8r!j0hePi1P26nKOVW{J0q9}Bm8~R3vGhON!{5EI2PGUD`1$P z5#G5dt5=Y-q+kYrDg73oGQq}b8(^~hJL5N0ix5Q+3OD?%d6(PeZ3+`XIg)r8<2iJO z=9gMwnQ!?TI18&XD95CQCea@j(n7`BQRrErRflV;j3Pn%VL|vqz}5l(;e>u)D)r;p zyYKCP@k$*H~cycdZF86w&@t$Vd1>#M}u0#t)wVsydJR%%B#4KgRp-TZjncH zzUVRjE=DeDuHy`lwAzgRiQhRlv#sqG>K#4-g*yD~lb!jn_ zSgNY_grul}#NidqU*gE#3m{g5Np_mXqY0t)_IHw#&e_>;c52B$5Z}l~9+Wj=rKKA; zlrC*w-1U4y;@CK*mv3Em^@nV5Em*?V1rg#(XcS93lAWm05_wuhZDcS;cIX|1VG%ML zLhLZPU7g8-oC39ee8ae83~+7%eMbIk-2Wtp5f+R^<3W%|yY7w}4q-A*>PPKPIWvD0 z-|)p~f!ew{VvS^WsRJY6(`cTU-qX-(Fr+@w?@>uW2G9~rLqp)yk2XbCQ><)HDLNqu zrVB~rztWfLAX~P_zQcE87$h3(b64j`OALn3+6(r6$|Dj7pAI?%Gn6RXZ6_GXwr%LF zQ6^O;_SbLEka97xp#%3zv?30loMG-in<-Bj)my%AP-~u0Y-6Ok6kIT`YudhjNUY!_70l@@A^M54U-qNVOcZ2_q(&fp zUv1)$+pk@u7U@rR_Zdy!x! zqIxog>s5aXu+bU(5@#l1y4$`;=)s1~CCc5Y6?eVFh$PF-w;kBXNv zf(#?)a7(WtFNcxLNAuDD&^gocyF#`2XYK2G__1yY@*(QQSpsW{oa2g~k><~wMnV>O z!T4COdGdqWJG<;7S-q+6{y-F@kg9|V3V!fvhE**h`yfS4PpuNK|9w7x&ixVhkz+WM z|KNzhI@@kx;*M=C^5f<}xNwEkJCLQRq*mUL6A0ciExP|b_+vNFIxab|%XHg$K1&<^ z)FTvhmcVmVQMYpJ2rVdee#H5pq)B3>+;&5pSdstzW7pTXx)g?-wKi_mJIkG;lgPNc_Bq&C@IiZs!ft2+Uu3Le|GUf5}^&u=@h=9aVWMMTwXe+ zSM7($%Gm^kZW7~-v|PY!c{GU;$SJrr0SzHsZ1^HO&4T_&=C`onh_E} zv+55zb-GT;j>-jn^X?87$adx*k~ES5)Flv7s9x1$h9MiTC1LCo<`uPi^AV=NL;j+` zQrT4z+po^>WtRS6BMAtK+H;tT+GzlGwsp-HW|Qd4v5jV115NmoM51WxPu=kDKi=l> z*^H5e@9a%04HB4mRaHL&DS8glOIepz+O@#y6iQ0TfBPqly& zVz$hkKo+e-(xMA+$Nedxk~&ul+Fx<*Di3|jlHc79oSbf4nwv!ZRAKbrl1V*7K?Y%R zhrh-8)ea2w=G8w3g!u!GvX%9kx1F=I4o6jJDN6ekQAohIqif@O;b37><{<-~Hqzx@ zU^5E|X288}ySLwYb+>n}5ON=#?;vWG)S&@yTYMc;l&gvkpUh4?7Ox|LW(-THFe;P8 zGjNKjNq?v#<}5Qn>1_cedm^l`{YPP=nAXNVBM9B#LQAHd7ifo=dC858)_7ea9c;dv zKQGL=Rlq?*b+3I|Fu1?zSC;(ne0V~M&QrZ~`MZ{zq*r^b=p6;q{#AAUJNw!R7YL+x znRT#(-yFxkR|?`3JQpmtjV0RaQk0gHyP3p0AgHi;;Z>$+q|DBd4D$AVnUIzdUj5x` zw_T1N|Ia~gOLXffsFEPa?A^}_tlBG|c+Q&u5M(JJwuFXa>jwfr%@4d6m<}~d-%xhw z3l_c_AfKfl9)wFO?*|}S$$MOq%3*3aXPa1XzvihPag0k#5=Rvx*T;F)iXyAfy=)De zv~SQuzY*PLZu%ln=bxSW#VGdM~ zGldHN948CJ$sEaBeg3;JKa`wnV=HD)m054Y9>Mk6vl|!Eh=lC(a;!w^SP!DWqLg@$ zBXF8XRl)#!-s8oq!5RgWAF6x%MkLx0xPDu9)!%r&AZH9Kx`wm(t)9FTeXDuI;J|(z zwNznTkW-TSw-#h7RcJG^WVPY4G$X=>p803qkMsJ7Pa&P@XBZY~NVs=08rC$^QciMV zg!>CSV$U&Q$vzBtYF)%@HaCGs436A`KMs}9Iz$!Xnqo_m7;9xXVGf7Sn znN6vgm08K07dO+k4MWsolP+^lZZT04gE#--o%jkPAZfuZrga2%cazwSUJIu8_`ol2jphX1{X1XaSuJ#{*0^3%7z?Wg4w4=ePU}cc?*50beia7x2Iwy#u@SSc zI&Z0O-|7n@w+{kSm80;?Ey{FiNqfCA)pAx0NG z!Hzhr6U=kC3b_UiUOJj|oD-Ackk3V8oZz7hptmmV5-TVVW-T>xY6%+}+0kxw^R&F% zINC=Bnnu*Kn%tLmm0eZ%26Yd-9x6oq_|{b!7kwf+cveyp74tJ94$r?U2)p0rA4^j= zQlK(IoEB0-q`A)W1|{rolWXZT-Z}0D&1to>{E?^i?4{J&@H~VxGP9xU!9qlU*c_=D zF;ocFo&Zg_Pv2myT}yqI?*3>vIYd6;tf&D*YIqcPGO(^6sg4e-~_ z7-`>#%=|+8R@RwNbo*H$6y2oP1hXJzPc>ODFI)920NY&j`_X2i zU66kJtGmpnl$SI)(-~#V2tS*h&m7*GQ85BHqB23`9WcIDPTVE@!`qKz{@A* zR2S5U*P6(&!q$<3k&Mn#CHB@}GT4tEO0m8usMbqCK+$M~@KK8J*~l*2@JV*-yjjAC ztSf%<4#<$VpO_hz6=Yb3tc)V+Q_l_hv+t%%VZsT0S?T9lVgQmK;eDzxT_Rg8$bpU8 zplK$N2jPh+j-~3N8u&eJSFPCZv!T-GFux3lHm>_KSae5xW9wOZ*hPewNs7UUm&Hm=CRltff_QN&IVdxP{QI)UQ2w#O+#VN+vW8-=O&3Pe&lAD-_`F-S1Bs+8a)Y6GaVDbD^PF2H-uYlsxc)& zEEfYPX4z4+;}>_<4Q)YNl;FA4ZF@3mi~WNp3S*#X!3z)}1?~`AuiFQs*&4A0IajvS z84n-KDPMw9`$Drn8^Xgh(+?`gQwcca>|hH=hWGJ6R)N)S%zUNb=aA9m0Zl1m_%n(- zDDOm@r^|5g$ks68R`{aIYS>+~k&J-YHL&zm4=MZId#5MUlTAVLunvj-I_=3-8NQ*@ z%y%&IHF(g5%Wq8-S|D+HK2ROtF8gnH^`0brv|VCHpjmRYRyDy1^YbCnWt`rO@$>w2Tsdw?F1o2mIzO+3o&C+I&p{p*d$>A`2R??Nr6V8hG}49 zwx!uW?iq$C)wPUydu~#|>8-WVfUA^yqoCGOWf8$){?nfYF0v0Xd4GpN2cr}qd*zfq zWn$Ylm&xz92VA)TNodY5%ptS0yH;x9zn-cvOsH+#J&KOfRZ-(0IwL0UqYP0AdF_7~ z%qBhOoe+SN1=zup0ZU!*+DmLB!1gu}6|b{kF?Z-RBbYBy_Rw1_NLPW|@b{PC3_}|= zPFJng3~le@CkD{gE1FI{Hk^`hVvNF;X(SbOND`l{wdf3IJv{^5U$1w}*%msw3IW?r z!@G=RToSZpuf1PD@vFYR-c!c=}e>6ot;(zD@b`?P+c{?l?E9Wa8ZEZ=ZM@<3&zWnmxe<$a}*Z zuM|S{&5ID>#IZ5g0PY(F#9YAc-tMr^2Y`(-P* zEbd9!k}@ERU*ZP|h#v9&(7-fsh))vEnYUeuD1{Dee*!<+6}-UdF+0TlbB;--PvdH? zzz>Q*xGg|4Ma<>_J6O;Dx2%z-EW4)1arON6%4TB60nsvq{EX}SWX%%&Fq(eFsy>m@a(Hr15JqJpSbGU&s z9B!(OgIpz8@9Hg#*!+_ncw3YmWCC6_JIl!gsyjE-!Cwb`gCcm6x z%_c-b9^ktONSM@W+=$I`B~w98cPc&w%&1YC?rgsuw!$hFXXpn5?`Wnly;*kK2`+(= zRl{2g;HJ8v%TMZ|=+G$geh8P#r^=+i1k;#Gudj0&B~19%g2b5Q3cn}AZ1{w12?Td| zWj9fx$pnTL)azZ8en0{Ei*1;r?P(FYObys3Xp1*`70`b8_(ItG>U7J+;O(i zO9r!7D(n&x3_+nSoVd5zatF`xk{TPB6^<1^f|dAjYL1cCs?+ek=pjpnsUILT;*4XG z3!Q*5rj=`>44FYLS~fvoKJ<#6_?+sPW zg8{Zy4X2~E*Gbqd-U6#%^Uf$kPq_4sQ>aMq^UdCnbn+5s89s^C(ModYpCw2+AD9u& zqiGpxNJwKdMM&*9O$DSJR3p2J9(ZH}o4MO!Ybt)gdBW%@`Hz*?{fZd2rh5NnbNVu< zi(Na3G)$z(V?wqErboOobMbeGQtP;Gz^ajEwYMMyHh`o3ho*Frh|}N) zT8k=Q=z}HcQya;n5J8CwTbb22@o~waG5!*I>Froql;NhP-T`}{K&r2(CWl5Eqv6lc zYBc0q3VDoviS6fjBpUW_-1uNr%l)xdSkj8Pv_9%>PXMxCwLFSpetFm-K^H{<(Jv?Oh!FmQ1orhKW3N!~>fN*`*MDuK6f0L{RPuAJlYf;l{NQHs>^*|zsejB8 zzJe46A zcbDwVzkwV&YEB-QVHnnnil{%rCoBEETT~|U$d|EN0Mt=o&Xjk{O9KY}T(G5>{dUm_ zcdwwEIjrn{+w92vu`c+T2dAs7yN$xJJ4LW&7=^3s3zrxCVVgdU`#$DQK4y(O9CV%7 z7eZFz-kB}`m6WfDLPyJML9pB<{D?eE%$^i`!id30dt{kjhgnZ^SjCv3&Cz925_3dS zp^TF)t1zc-zh7z~yYP9_0a`ATjNgi*n#{>Yn=!~D@(aXou^BcTAPk0eAa}hgipI{n z3ke0dHj({qd;bEP<8K0N6GM>|Uk|;+k7~n&1wo~OseviNi?&5`E4hYb*EpzNTdF?f z<6??vOPkbAQ48q?jk{e_`Ia97`yA_8$|uoxD!;&YZ|4uBd`_)#i9sBxniF#LXn~Wq z;C$>V5uOs4(Z^{t^Scqm{SD!O6DsAx|7{e5EIG%paEBh)1;cL`qM zseRp39eSt%NQTUg4P;?JA}{q2>woVZaxgH0#TNQA{&pp%#ZyvRdUvx76p^hYYC-AxXJ>qQ{VQl z8<%;9o#_u6=H(uHn!~#<@5eRbjF11_Abo;*2b&gVm)nu0wcp}QPi0ZDR-#%VY$WLD zhoGP}{Q61(crZ9cHvF#rve}b(2=q{z=(LtI#|t+9p@7dP1ZKk}dt%{9Cm7Rc~Y$`nj}lZtu}KV!R;2 z@GJ9?nlG_J>#^gitc;jF>#CG31qRCGjLP*!TpFNTjttpX@16NA$0s%P*ELwq4~Z27)+raxcicA ziC_jJM56c*oxHH2jJ432Jcb2)(X4%RAoB5_|F`E%P+uk2(?p8ugU6k|x$6PIax=-D zLBB@{;(lEC*rv&nqR zm;c>i0XSTU0N>a{ZkL++mtcOy9p0AbXoXESbYN_ZI@BI)-VQXqZyX{BAi5XkZpxG6 z*0;`^Lw%3>yk{ZRPy3I^sbQ*jkH2Eowh|e7d1?y53&|#IpND_arZ#R3%Cfh-(`CRm z-?y!b-`C4SMu_V}2~!$h5hSNwNCJICA-Gdb7@fI?-|>?~o*H5=OKY4wQ>o>|ub#dm z=(0C_$8B4D$$e+_sF*N$NKdFy=lEgECGV8oZH?r=x~#(ys~l?ajUB zm>QTLwv!|$eyuqST5GWh^dO`Khl`!OdUcJI>^LX0QKJ8^H|FgpKJ}jhr0Qb2X7EJO z`s?1C9r)l@Tq#?^DHFn9n*#@PW`8&HA!$zQpEq}XDe)7 zgQnXajP{w>a3^f#M{6_m-usezI+m4E5dNysaHBUO3EN9^mDmZ|N(gV7Rr7S>=TOqA zxJepety6RPQ~-p@rVux5f+RGqHyd@t7R~@5(VEglo&oNmZpe$%<&KnJsyO^npOu2X;WIN&)PX~ z#^#M1eF6E#3u&RlPuAhU&@S0;ndFqQ1WIZcM&Wv>>;VU!mhKBHAa9-o%RX(tZ2PDF zHMCBvj`M6LX|iDwnzgrg|Jd$6%amAm+*y+d#I#c?WiMJ-dhhb4cX|RJTR-q)>P^g^Gwca`*kN)LBy)= zDae;becr@~@W&3SB@5>3Lr;^V*F`Gh-EXfxUC@Dxx_yhS>awnfn~jj5{VL+>8IEb>rs^TT!|nJzop#K8A8x;gd{U$}{{CWM~;lZatDZ}w=fYD_&M97x1#pfz|X zH$#Or1*yimYU|WITnMgvY6y1lZ260dmwq#7DDGlX|RS)8htC|_|vJgQXtwVhP<(cdGPz-h@ynHv6(t+uceS%)cf zICrB0J!I##{K=fZS)?&AU3hfyg})p39SlIBZ#D5_sZR;M4%uK>a4Rx^&70Fb)wlZy z2q}f=A#e+VsCK~8!e#95{gZ{n(EU$BBVL3EZ%KS!Bz$otTX>^#crAOIM75rv7@qc( zf;aB05@2^YjX5=Hs{hO>r$7?%A)V!&x7dPq(+8T!|1j}>rn^ZQaa#TSh-dFU_Kp|~ z@(~_tx6f2V1~_!pj!W9qYygoqR}zK zZfG_=l2+22BF4AR6lm#?@-Ha6<`N)q@X0pe%^@VoPo}h~G7S+!R%_u6J^i<1B#JmY zF)V1C|L{x_8Jr*8ozEWPJ>mO!L48#L+v3H$2zd%-@6c>~j1D50FL>X0I-K9cvDYe$ zjF#1a$Q)0yaLyG{GC=E|Pq6vQf(|F`W3hq04iT~<$QTqwzf)J_xc6T!Tw&O3mAv0U zEkfh|7FJ94LFAbvsvsDH+Z0c!!xN-|nnV)=4*HPx_=M33B4EeB78YDspvHL{7^nIX z@vqo;kI_VMU1)1h(<+CGbvOnfT>}0!0vY>lb_sRgXH`Lu*ojZ?kMd@ptxlYYx#aYMJ%Zjz7-0+VV8>H@%h#sqBiUY)L%nYFBhvF@`K1#gE8IsFf z>ilxYox?W$QC_mc z4U^yF1SmQcW@N6pl^f_cD(VBq$Y0JQRw{CSuGq|?$UjZU-_EcA|3u5Xua1$u+?A?+ zYRyhKE;-JRvD60+=dIyPMl%q2w!r5GeqLrUR8!)Xtd%d=FA^6@e! zefNXWlGjB)qca<@t+hKqX~71*uZppr{-4LnvdHiFutDl}>#=+c01>kW3xGUOb3D_R zCIJMS9Jp6irmoz!{d!HuS_%lht%$K20#EiQ{l2T^9yZH2{4aCb?=G@0DWO38wKVhj zM}`yp636P?Q;bI={d&gvxQ}T_zE!n|q?Oyz@qC<$*3)antLkf~jkP<`DL;Nn0@7)Q z@!4yNv1h_XYh7yPOP~Bp2%$Y;mFFYDaOs z>PQ?}+i|Ymf6VVvKetDUv41e?`Km*`r#|t?fOo~uu5<9Dk0k9>J_G1bgKm>0fX!L_ zZj?Z*F?8Y->gksYmkA6%{U0rA=)=S7{jkweuXKjOJ^fU^I?nUCmDgKvSezvZ#_ojN zhDUo?4nZ$ptJTmMLnBKw!*-vb>9eO1 zpFvahwE!%E&m0sPa`fY`d(W*a3Vzj(rx4_?l9@>s7S1#Rd-4g~L>lw83d|0Gq zony_YoKGlt8#=I{#Lzh`AV2g|k*GM((~lk+Rsw3ezb*ZLW$Fk0NoX>OzqSi`^;$x{QweAz>ZdSvt<5B!=L1j^IICLoR$efAMy&`t z_Pk`xpJ>ScNxQnToyo86%Fdl4j~!KQYZ{(fjY{H-rTF)6(XdzaU;7WAhw7ByY&cfG zS)6*p2V10}A_P&O@l>q3C#|%sNrzsK)cl9+>ECAXqjONB$lcnLJvMOBXOgWIZ!`7eOIOAD< z;%Vycadk)#IzWt_~>AYi9Zz66_R5iMZEFL7Pu13HLDme1=R0OHoz%4opT8GW-rf>8Oyj6nV}6^GA8sBPo^n1YFfl^+#`$Wg z`<{;ZFxp_RSaumKNm3<%ENe87lpC%i0~^?Yx{;Ph@sIiH882*8EOc|*BgnwOw5irD zdPd=s!`gSBAkVFbmcpAQ?Ots+p)Q{1jF76!x$ersa<0By5xIKYnZ zgO}wKxq?+S%nkV_jwUSE4^j9kt~L2A#={NyW{!qw+6sLtNmbo;G;YYv~@HP zSKwR!cVVy6(?2m&qiDp#FPLNMaJP{^%V(V1Qva?0T_$C|__)D$W9Qj<^_7Vzovfcq zd(@T?E?%ESy@-aYwZW=%NWFZ_*ia6#ddutxI8P5tZHb>|uzLr|FuU?RV)|UA!<;+`O@gL6skm2K# zpKU|A@Pl*3*i%UQFw^i%T*#&V1&21)bS}fn{dm6lZ zR2)dL8jVyuwT;(6DsjWwI6eHa5keOmKJFCXijDI@_33r54E}Ce9=CI*cHiL^ze@|i)N{A910QgeuW6l;*-bf zln^;=uzmKQUOW|jp#+e-u5!b3a_GYH&ZV4F4BtySAD>fi{wH~zdB@bf-A7ahTlXio zh7;(+>`7`bnxtM_*gRL>)N4@KRxBUZpSEH%v*u$-PS~s)S{mxkAN{Qq-6+_Y(eB9) z2oAnAHE!{8*3$TtTJLw;1L{AvrgqDV|KiIbp15k%5^w#=6PK)fb=dV-8YY`F9Ki|b zw>+k$9BP3yrCd@($SkXzrcyxS8sAOTKMP8@=0J9>R9Z^@np#ax$&UoWa&B0X2uAHy zrgi!FjZVz)XXK%9^rWQLXl7-%cyFE(twS6*g!3xWUA4tdwK?&*CF~G??Ek3s)TfYv zf3n?IeHc2&>QN!Ty;`?z6rv zi_+G_xd@ItXHhagzQd0Ad)}T{{Z3xTmKMKM2uu0{`_Z?$L(M9@ zB60mid!II-d#$YswY+>?9;9iSRfh9$JcS-BnfJdWeUPh18A(0+bfv|U_f@VT3;IcI zgZc6CRkaI`^vla|&z)HmXUlL_&T7yRHz%R77n(LDVl(*xJa?XCsx&iP4CXeK5j?;| z<@FK` zqRAsJSf!QlDcsw#9_!U-#_PTfiQ|4Bpy|`~d8b@7jyu}&`@!$xRv09*x!IRnj_GW? zT(s`-U~1qs>lZ~8Z+&!Tq>LA<*f=hnI1Z*u5S@Vc4=VyqKxe;5sy(T3s#Qy<{|?X) z$-L8`CD2*E?pQzaC2$3XUdMkHjMlPXT~5~UPddYedopJ%GPHvYoqPaU)1!;tfnC?95&G{RRK64_UF{ z8gjfnl(I%D`(VFSnyctMS;0p=&1mjds&vL3Vx*@W`|KXa?(|g#I$P(qKPsi0%g+4= zygmyTC_Y*a{A#)f%=hcO!*>eC4mYlN1g(nL$v+=@Unn^!nMQu{=up(^5U>|tisMqI_YOaKNcL)Pz44y*o&Y=E{jpBJ+@zAve)7dy7Qs|XCur7wsg`$TM%PKhr#6-R z)t+s&2Sl6MX_5kYe6hT_rM~E;q2a5?HHu~5(Arc$;J+Rj7xJ&L53177Pfv~;$jHZk ze=WI45)ib$qZeMF`u!QJ%YI{${dI{qEUz zzO~jw`wUa?TAO)?xH&p@HY4UAu_?c1myUWrh}rAIVB_dIgyjTiHL5;iebGv{gSqzn z1gwm6uee4P^cL&=#+tj%jZT;C?C;*1((ih`NWRDWwUBafXEFR*seuuMVbA}m!>_om z1OBR@GyJls7r|PD5;}*Y{=>HmAN2z-G&)(-zU(*u!|OameveZgl1&WsxA2$cW~DE3 zn4-J)$)432z%C_x(TQMs_N55 zJ9M~|sw>w<>3*zTaBV=AG;}+=?YxVB{szJ0?RY| z3X2K4^e1}4n8Qrwzr3llk@bz@gN~4Zj;1J5_H76#!nHR%n{xG?NQCu(6?g9p1P=;J zLv0oE8*}V7-F<5J*~R)OFY2uRWpeImnDPiK-GKLeTk?y@Sfz>%GNMf6NZt9ceKV*I z8vKxgG&99hOd?;`{1330oYaJJWThnL#_f$XZ@Dn# zo(sUYd`#BdHEcn!TTg-xzTMLa73c`m+tIHtnc5Eh^ivn`W0K+g#-`MMb zN(2^`s@>c}MI;>gDi?Jo?=@X{q-j3gD|`;NIZVvXc@_fyR8_1$hCA&RibX znggN)ed%*=)urH`ulj-@t+SkT8dCXkW7BI(nPn2X3!(p@)^?_B1OE#G1q&3*-gu9zdwmO zz#r+tNMS!o?8#yfvk7#JhSu6F2DKj0+ll<5=Xut??VX3zvuT&y?*NS!B8V=Fc1GeC zq+Hi8%vITYBjPY^@Dqn88Vjh;gXVQBTCY0;*ty}f>+4nh4INyk2LDZTlezcgog$m& z>&O?$AtAA7u{x7IQYAGT6ZxBU%5z{E)y+ze#B&>FK_1-zP-P)H5qrr#dt4p z9I7RzK7;=e^;xsbbh)yF`!nK?8bY1>6cr)jyXDSBr(S!s(vJWQ#PrpMRV`Hv+108mc&Wg zJ9qiDK46-Ya6Ovn5;9E%MjS1?Z^VS$94CeCtD2j-O*>AmkU?Zr$j`KoozZ{_Te zw;@GP{@u<~#Cy$a6TUL9oatg6-u9?gz4ViB%;+g^=k{~!98|vWP8}F@%(cV*dyap~ zL@3$dUGPSO%aa916MW5E!gRsKbU3%N#&Q1hWiO{~{EeQfRLhDdqa%!{r1l=sYcN{gE8=Pul9bYidi^r#k~d^hMn$aS?# z>DdS6ZF++)zR{cfyZ4ULhUhO3{leweiu1t;+(++u6(~#|5useW@VKMkgw4ha(XS@x z5c)^A7L~49EZ&U!_&)D1KKL%h(w_*vC#W6NLwX`t>D%#_0sVSi8#rf^dSuuBe6q_d zA-^f6R^S8PP9x$~kNphGwIHV`I`>-JIW5Bn`ezpXraX(LW_3+nS3KzM<x|AD{Q^Jfh?AQ~pJ51Bo3heg$ zRj=ck0EVdV3Ojp1yZ9Q9*xs(of}~HvF31wQ}DMX6~^*?m^4~6g<%5i)3KNw%)H; zK`1&xgqJE{-*rr)I*Urr{-Z`8@t@LxaUsZ}j3Q7gQG+{r>A$M8&(?d~7;A>OF%ONs zwhQI&%Ne&WM!fYrjILu}?8gs=C!AC)@p+{&hiTSAVw;{3_z_>oe<= zltJn@P4QJJQv)aX=E^SWg-Ucl<(-tkAL##TIfLmmt0eymE%3NlwRS0hn$%RU_O5lc z%zT8odeFs2v&!jnSFDr9y{i$K#4A+DSkYcgS(kI99umVIdit;Vy1^18^6#;f_T z!rn&D%C?8d4ZlRrs8-zFrXRk0P(09-@O+*qL~7&>?#-@zf28zhN{{4pzk{2*vI{85 zYYK}9=LY{DL+9a7_5a23BxJc#T z>1OXe!nIxVUR>_Je*S{b=ka(y9`E;go%4Duyowv~3F7(8-1ebJ^}*`KR7b7#zl*!? zG$Ql0+^^r&WvE@5>1H}^KQV{I9|o}8tiCG-NaR2Efwx*m+kW{#vlB`^Trr%lRWaQ$ zN{UW(4A^Y*xa%!$eHQmb8OD$@^eIxbpGN@I>a|{=%F!)Gd#~avIKW3-d>Q`c*pteC>D-NGn!{bG)?D^_43z1Q z`mqo#x`S3+Upy|7Cy`;|_2_t?f!8mR@`at;Slp`o3y$DN7j8~qBH*0v9E8>u4NtTq zHKIc8SJaa}wN8Rk^P2sHYH(y`I8-;!#rKRQ7HR|pETglzQ1Wq|6Q(;^&i56WhI?w` zHrN-kuhy*ucD#zx9v$E1-LdP0#Pz}JGhtWRKDcM9U`L`9<4vy8{RF=WW^w^h%Nq~0 ziaHj>-lu{)3&4nxmk*^_?oMq$K71e1u4(gjb!`z)AZ2eh=l=*k0;ll8IMoOWp2A6M z%Of^ulHRP2@l2I_g z@+v*ce`4sa@r;Ju8{O0umY(4IEHzh_>E_MWx7X}El1qxw+HVoV!V88ro-a+c_~8*A z?5fv+Uv>BI>_7bMy=%323F>Qo!&AF>%rM=)=mHv!0N#Bcs^T3LmkxRh`T1ly8>Rj; zCg3&w{(Sw17wa8!y_Szr-YD1o5x9fdinpv)i+5V1(-7*)e?C$KNN?D|ViL3&(ul9l z#0z;zo{=!HSDTheOA#mFwnhtxwEb$Sm^Y(+Kta$$>xqUzFWw)d?%R0ABzT-L&;rDg z{c}fZLmW8vxZj#9`BU?T6J9k*Wf=eqjz5|XX*O0ZToeC&KrTFyy^uZZdj)`uW3?$S zJSytg&szGMT-bN9kY~9e&Bxt{k2qPpfn*pIJs_SgKFRVaxAa1=MGQo7+@U&c9kJcT_rVk3hG!ohIht$k+tx) zkCQg1*RlM&&cu!^hvvCj+c+&DS^)7zwkaY}&Jr=f!8_`tJk>cAweq3#?}^zb5zp!< zbGvD#*@dWo35dT(P32YX$I&q%&+7M3_n_$P{brH7k2y1s$akdA2K^4;y-S%^H56Y;L6A^ZQvx7{!(yxr08!9fu@6Ircczo!ED{Zpj=z)Q~EgWz`lZCKLy41X? z=5ST|r8{jTqlNxU9F$BtRv|OYPlR|B7mZNy`>v1d3tJT&rZTN^x7da$Q!O2-c`TA3)el|riQAGtn#kw! z{`Kmp6YCO(ZLJzJz-9Ur^RJe(FPmJMi;zKyj!_y04Rt|~6R(dVj-}WnPFls1pGL`v z@DbGAIj=k)zUtD$6Z~v`h`OP@mdB(6SH2LG^s=kXH`r@Qs#~|?icR*MEM!eG zcd$iw1)5!AzE!;0&veGS51_rWB$sNrATNJ8{b zM|wFHMzmYT1i5eidlQLoIK7Iu-qCZW=|n^iS@+YdelXF0K>zZ(p?x`9l}Pvt=b!@$ zaQumni$pgq`A@uXoCx;-FNW(_d02iTgt!owJ8I{&6lOVp+R4d*sC#Nls(x2c4GJov8by>nK8 z(Sm!9ndOps`07j5Wy=M}+7PiI%qDYejgucHXxD>SlfBYBbW0^D^Yn^P@3VnyJ)>6- z4M5CrgtX6u^+MLwKR!86Ejbro*oS@$l^X?{vZ_8J?Oj_E6ZZ?rdD-^w{6hGgY#&XM zC*JyKkzWsU?j?O^_Q*K25vhAql;b1lz;g;oM#N%&?pYF9$(uFE>zu9KxGh=M=#etm zTDEcPnJ8v*92|7Ro)#%-^$kUOEj9r8T_b|MHNbY{EG+x){IH8PqAX z(|ORJ6Yq1DLtuJ2FC1*XWBIt7mbslNiQf5Xv}0NLP_dy#0^LMvmt>)EG<8+#x;K&lUWI!=}v8oiep|HWmd&F9bVB zqd7l{fL0-nclXi*lVA)P_S5=l(a9Ty*3evvT!^aYrT*cP9M>?$pgYrM&Bnhtmm#hn z%ZjkiySK5kFUQT0KJ8Zl-v#&i{q@XM-ilBx z9IDcMcbR81mY9OrZ}MZ$JvA6B77-sn4kRn6aKmWbIh#P@G}$>n$Y~?R5$Yw~)9-9h zK%C)1u?SDZt|B;^kh`IR&#!GG+6l{?Gn8r?&Z$w^EOu9~G9g<+bqOu9%AZb(L}Yx1 ziYPpQ#}*68K=Mc8Eh40n;0QZdv}lZ+%@9m&P2Pl(eda~;CL2kUH4NM>Ny^q#OO2QX4W%Q0aG&pXKqe-}$yL|aP{p91RXakfj%0y-T>o4xFQ zO=SdYB?HCn15ZSJ4O&0Pny~hyy2d#M?{&+HNNF1z0RW<~mEr&84-$KQo{xyeQYYTT zi!L4s9GMItn|pp7bjnAcxG%VnUB0-w%!cHg>Wre4V9OA%uhGzgm8)T^-$ewkqIG0g zb-31`9sm0a|9bSsYFaJbN65Ss`ytb*@n#{m>?ha3)BZU95t|XLV|Y-z{9&^PPA2JN za~Ul3Qn>TeTL&Dqj4Nw-iD_s{`&dOWkL9ATxcC%Rd{^y*IV0T1A+crP)z>E(M{!zG zZ|&pSAm#QC8iJ>>^bcfYb0TS)HM_auTkZ+h1M@!IWm40M+;G?vQMIX_W9^U#{jkCHUH~R1U-X+M}ALHRKgYe@?(POFK5dBz`I^lOY z3;xR6vnPjz0zp)h)hyw{zsdXkl{KQ|EflVoKR-@6@!b4iE)K|a$(n2NPEVjH{lV5jg|yh$tQO*} zxi(EmNjR%=h!cpoJ-2uyT|*S4+aF?k2ioZOQ*XUpFqZY&PPCC4aQxNiv{RCU|7q5o zf4h}cLe8l7)OFwd;n@H}_7myeFQ>C74uXv&?t}k+|Bmu#tB}UM4wv)xp110&XLmpTW3O=UU)ZXaC+Hoa!19qim1kw#s zI#6?~a;#7Nn`|~mP}@F8f&-8SO`TyN7pVic69W2E=yWBjq5yosH#-4|&N-FW;WBA` z1~(Yb_=!$Cirn|95{C;?QX^=i&(gNtE9{(<$Sm11hDm-=k~vx`nITB+mq%uB#0}3- zucV}Z-V9nPf0de@lJBzW63x5W%vAhE4ow4BpYKc`S=tC_sQq;DsQEFWY2zV+?elKbS5h8h2L=>mAO@IBTG88 z(vZN%d-u57Du03k9K2jG5n}PBSLqoi+@U66_tor1kUtm|r93IYZA;M%I-o(8jdJz0fQqU;rS7 zRbd9s7$CXoCGmKtR|KKmv$-p*sQJ~0lLNd>VPtK*)ouIDcC`M4G6r}?0(pLr!9rdY@j7d6>MzLFYTO~MQ%qXYsQKFGO9jlx-ppj%4yqbf&7f*y z5Wr2jl?Wi)#5wNy4y0orEhzdT8=WBRfKqo3L{Go{3JoM0o$s`E7eCy;&!x&zvMF#{tOcnEHz&-a5*ou*1-W%OcF3fZNE}f*}}?9NNv_ z+$6Uh4}Be_g0|~|pY^LVELDpLwokmY{k-dlF<#;VngkoozU;T5(ioaTYs#Ul-XOX4;_}iZ4|H6 z4QmR~W_A?N1BijWhe|~ZY~eHo@~7`n#V$Tm`>7G#*TD?2mLy{!o&GoBySi{ zDFsuV@88VJ)r(T-@l62FDYLgBKhHG_soZdy@f@t65xIMZIJ{8tss&v-Kj^xIk`rd% zwuD>jwUz#R*bMs+>($M4E^F+w1+D9lW!Jx_C~){hb+Iet+2K6jrVT#kRQZQivU=1KB$msUV~5ttVM2zCSrOoz8lqMJAuA{O)# z;3T15uW5zu;hG(t_5a05EVH~ySV0E!8-!mXq9pn?B;-!yISazOYkP8=#K^go%=xpa z_I5&q)eo!_?fTFUdm~ymezS)TPW#rzFyy2iv*4i?{aN876ggKnJE3AIFCMXR6Uz`S zNd4esj{n91bj0Ro;G}jxrMCHQyAdD9y`0_7otMZ6C0|>Z9BF1i&mGkwV>H}ntt=)7 zk_d8myW2M(J0SM!6@v=?>WBqXW6G@{95fH_`@mKN>#586AIrUY1f%=J#i1E9-K3-N}y4=d!5%=e4h_dI$9`u$jd+@&1 zsk~oHV^jC%>!d!ky2-mi+it|Uj{lX?rCYUg_dH`C*sD`(y!2dQM~qmpDZ2i2v%spn z$n8uq-Lu8M3wokwJ}UYxoh~;LCVxmYz)*70&+%LLI;4=Ed zR#;-Vx;#6bL=Q%4cTdN?G<_HM@z;)SD;lf5)cH`G86s^l5FU{ovDQhG!Bbf^P;Nh&z5bZ~JD# z3r}@-hKQnRGFQ>NvLvA@4fB^=>esikC071fz3ssE6-NL2_Y?4mMVMz4DQ6dDJ6yoj zR;suywZVb)I}nOm$rtIyElqVvHPy7nLZ8sp@b_!rA?8yP`vk2Fmq@MJ+e_p%wyW{O@Tv}WGc5rFW_JqQcU&7Gs za?q$qgZP^66v1LWu#>Zqz`A8J`I>kc%7mEs?$0di+RlW2xBm=Jt(AB$ea9|FH_1VR z+JE#5Cp0s-MMZmV*tr#Cc7_IY4lj8#g(`WE^Ro_^uRNK;Tn?XX7SSR6tm{H=H#r|a7= zhKWwgN1tnP5ZC)imy`8&(1dk`Qi6j^ZfjT6huYZgxJ%)q-|$YSTHk-ddJ_$(d7#^p^E4vpmGe zr3xi1TFf!-eIDpvmJ>QjpySijYVu##uZ4${tlOydI`u>U*4??A3!eCt(C*NQY_?JR z`)0;b{i1GE2s_~c>E7Pvz#pMh7j^%*F>bB&BTdZNr5)0uHMj&IJjthhqim-jscg@K z2+X2S?HM2!m5WC;Fl|DcjzEhLVR~-;2&-FFs}{j@3>=2e=FN~+1w|^*BZo@NJDGpi2}&A`nw9CF^KVt|&!#{BWntH&n$26Q*?xEh%>zfkcFf>3)r_#ag$ zv?U8zW_5DtvA~}5x^(7*(Ff$gVN~pOJZ4dY{S?p$X9wS2^T)u{RYFfUANIp{Qqv}p zoWQ@zS2d`SE{Kt2@q&@*x!d2YekD=LZbcn;d8a{^md%<;P8*A&f8gKVzl_9NSZ}f9 zo{xaTP3U&v+T8Pg#$#L2idW~9=}#(H%ej-t5Uwhw^IZYx>Mt6}xq z-y#Myb1i-dPMm#QA}ff65{lgOAGMYAM)QEuvZXeX+fT}&Ul@Jsk8Ec9a=;OOj^YzZ zCkmY@O+-uL_34#f!J+-muF<9L;a~GywgA+Xb^Ut9PQqP2NtwoS`84uou{G>cPG~x) zULZ_jLK(mGcs?;*oUaS~W*~^`)l7R44|8BbrmwSo?~0^-tk+u;y`{J6|LH&;X=CTt zu`9cl1situEu!%0i2Brzim_a87vc)6d82he6_O)&K@r8zr?HidHX*EG0s~ix&Ez_+ ziUMg?Eml+(KMzcO)_%TPy$&IVj5@f2D{r6Tx6SQ1osB*pb36nNEhDXqBn6d)rg#7a z7+IfUo^|dlezb9`?4^aQ1T*u?U~kTR!^Pb>gr|e6fM9Z4*)PtptfY80i8uCg-N(1d zHI;|C%0)W@o;8HBG@hogCG(>z+_PfPnuX)dMrXX!{GE-L2dJVaXIJ5KVMf!}k3b_Vs}JHR}mZh*Sd`<^6y^Zs*Qf&0F1;S^ig`XHZ2()LWg$ zSVs`=(kC=zngg#?3n!)Yf7vvKZ(Hb86gWtP*0^hyvb^D}c}wr<7`c1rYi4Mx&+JF5 zVXNGRdo9ac>x&FJJ(7e}a|1$f z$$w{_Y%F@%Coi<=PCKdybZ3hQw=&5uExNHZ7Ed zb%J&S)(7US_}ZEX)5a}XWxCnaW&5UzFp_L62*E=^%qlE<#07!PyFJe7)b|v>K=qn4 zGG{pd(sh*hTUD1OgKu;{D>l|~a%w-0fy!7Df_6;dmB+A0Z9CDFS5+pMYfhJstWXlY zwfK*i$k$?0?}7yVvo>V#|6L4xlIBy}65UpEB1h!zZhZXB6{BLdW4YHeJgDy6vTpPH zjjOei*~ktvwlKFCyz3PcmvC&9xrIHmy>)~jcK4^-sJ!e- z0b`tI0^RH~A+l7Os8g*F*^oYUnZ`L0jT*Q2KBo!q3;V|X+{9Ef*BCu%JBQ=w3ntSo zs21zedhVnm1M}N~as;B7&E`>|Ta$O4@FWJE-~IrmKy=3c^c|=(MTE8&$2eP!D%KWV zfVfvr)IIGXFR8JR1H+t^KUs0`opdT#GFRg{a^7XG;mc+C9mKsK_Gr@5UC%( z9+oU>c$S~>cK~)!vM4e-^H6cvY2Y~yBo`X({tOyJDgQ9(^?U>;k1w}ml$gq6YRmj| zsW1<2(b;8}s@=7w8M^8|UxIxt331YoPIpyt#mc!9IcBl&E31Q3t_E=l7 zw1pcB@#7|dx7mn9hmb^PyQDQu(@;Y}b2phvVU6xQvx4Yn!1D=Wha`S*un2d30KAYE zhzE5v-AY{6oe3YNIq_Ys7uEotYR3gBSJ1zkcRJU3Z_z9QSQm26Arax#OQ%Jv_cJj#A!%RaCKlB}rh#wp{7-(+4#bRy&Dw(a~ zGHmC;v4?tIKOPks#xkNGVk3(Dp7t>Ec(!saS_Lvfy;hnG%5CQ+I+J!)@ca0o(QsK! zW{K)O>0uTW9COkIiy7hbKlYHdqNxRL&40uV3|e#+d%t7S&2{TFS5~n2@Lv+@e`EUd z-^gJ2WCN#xg2!uG<+=_1@9;HGz|r7cE^k(|@G|U<&X1x@f_((EJ>af?`Qi0ow`?;^ zzA!J`G~_q{$8u%26F6}eDLUyZN;iG8*uCePI{h4|0r{HAb?y6N9xL9Dsrv8!H zTwS`S4hlRv%B?k50O-7;JZVUm_~emJ?FWqI15{&*`A?+61b5$d*PeX>qm{YgPuFBH zXSpZr()LG6!Tscg%5l#P6Fl__w7{uSd9S-a50&#P@Bw;YUbNk@CET2KAVmuHjrc$9 z*q_hDpX&y2`nUDPo;*>H5DT^D+Pir1Vp`(*J`48uUYuR6sM(>fC2Dyzh}Q12?UVwem;>`4!m$%1^N2r9lD3&^!B?#Go%l zTtp?N{j?^&cHP&jOSu~3NKz1fJ7K3I^^Uv|m&wG%0&zQ)J#GOIBht*zqJEd%aQGD> zmCy>6R@ZEO>^*Lh^j;R}qjuUgzk-uaurUza>8DEHDH_(X`VKHv58yk8ndI!4Xtj>m zn0%ErtgD2hTI_@`Kq|C4Ak_i!?bG7-&otNK70c1o@D{Jz!zh?+!0k$VZS^m@id!aa z)H)o0Lg<%pK8QdF^iXP#qtUAbhFF&(7Iit5@J9108f(SfA3mW5;P_RgmRZ1NEAYN> zuCPgMvC!x3(AhB-tqAyXh2fcnig!MKt$LKj81}_V2y`jM0I$0wU!-ePAy&+Ufy8k# zC_0mz*MVJW|8i4R$O)ZTO{D!Em!Kz5_Vq^Y~>KW&n>|i%8oc&RCiO6xZqG|a22-f7)J)w5YoH}Hp_Z<7pbM?r(~3ZH_L_`!RaN|JIB6+WxRw?ut4KNV0(W2#4ZnW~)r z07{f{S^B)aL-UA&m7b%966$eDNi$|##j{m1#1!eXk{r8Hz5=FN=ForX(FGc}xOGu~ zuFrxDhx9pDc$0KWOBq6m$G4ja%BIXFZ9MW@XP@I`ZY)-vh`Y(SVDAdKcKYJ$4S1kA z%`F$92;EEYhA;o*Dj;_E)EnFXHA|dq%tGF^o}@t&en-T^&OSnOnmWbjvAnp>7m)=S zT{rg*_qC8&fkX6#c?Jtg@oV`Jh{{78CEdD!}Myh)hnHNwo4aYplu zAcC^kgNnzglOr=hIXv>DF4`yb?mY*V)r}NrtKId}31{9CkmrZ+7M=G1Ps zy}SCK?!y%Bf(TOTrymX1YP4JO?@-K$|J{(6kk&iAohJ2C?qofn5ILyVN< zyuc@)Ke{^I;4+-z`g^&YC`RFX$w%&6QeHR&Tlnpub3s>Zpv0p{E+j-BNa2l-*-7>W zr=4EM$qd+Ys1eoP>}nvJ8;~pAhlyAU#O81mRvn|B01!Bkv7*go#6m75|S-Vx5 zO#&hAZ%%)*7tIEGc&3C^Iba=%o*OsUV+&Q!Ya32->JI1w*!L{W?TS+K6S8~b{jp_M z=}YWLxp@ynapT!S2A(QQwO316Fop-jmG5fmx&Hml1Q_;`YkqMMw8H2rv)QY;`w%rV z(FV*rZ2nSJI+r90pA^&$=!ar}#fQCyY?c^aK_-I{0Wm#^nJUg;cc%Da6K%RQekkF! zLe6O;eYEs^xNJ-tHxju4nOlS_LsQW@LPl(-ktdg8^v0v3Be6)ZZT8CJ9ea<|#J37{h(nfYWv>{T`-v!=qcGr=J}mQ+%9ivOi{;u>Jna z+05^L>OFuSy=CnRQIym^isqiFG6ly?|M-yP=2#&_wC4*3*+=}D&@Gxr7AgZ1aR76wSpn8VU?%9J7SaZ?^e_3Spb>x*$) zIl`PdFlo*rL?-jJNM>Jaqs*&pH%|0LLzvd?I+ud$AtPprajpQ_`e5%PudYG(Bwr`i zIKwXf+5M4>W&L%YIVNT@RXz6EQvb!@~agB)@U@kO?V+ zHCoR0c>MsLX!k?$6*MenvME%**gasD`_eR%Vt18v*h0xg%m?hmHqVF%PL`G;a>oa` zhA|ZwjO6rVS8zmv%UnxBxI#V#xv%@>xf7jzg4PSmuPmOo1l=)Z*{bhvZ!=0rxy~d% z@an9(s2d%qi7ZZc)|{s4U@V7Zsoi`h^^X?zxFt6d_v`cJ5rbIW1Ha*%FY&Y1?_DCi z+T9MhS=IUyXt|!TG18zZ*98e}m%_lv-4!pECoO?*qJEm=>{M4qWe@hvjlly>V+L}z z9Clx6S0U3>)bdB29C!YphhC5kJ7Sb)yx7$<3-9t@AwJ_!V@+!Ps^ii`o)8;wYf$go zmfwYt78^T!0z|&y2GI8oR-8))|1!A<;R?}B%A&K6+(nYKf+YlL9K{v>mruirw7N~c z!a0gt@AANxsey>%wy4hI$75eWE((#rO_hjaU1+MY{=OL#(B-uOD|>r14EAkZr#!K9 zBrmlxF{WA#xdTK)CB_WX#=m8I_#$GvK{wo{YV z(?kDxnBc8_apRX%K7J>e246mPF5xyu!8T!%aZkzHzM0HJNL{dCNWEKX!=TiSG_Z*c zw!B;>sZee|{>fi-G8&#>Z|c_tx_>k5;LkgJ5QJ_?9Z@}JBpDkayQu^Pq4*hxbqjH) zP6v4+FLd)yM9z&npr;_ncB=^4zI)znVIA6lu&lW7Kng7?=T7kDA!mOML_= z0++qJ9P00B5ey{nU4V7ga_Br9@?AgcKAb&y7xbrT+Ljy{(APCQ^KbhyrC^xgS(_@h zq@{^_C%0f7^w|7^o2 zu^72bZ9|}PmG~(()_8erm-7pkxUB@eHIU>Xu(Q~hzG0P{%AnElo#UV? z>bj|OSS#14!AC8Q!ZJ1nuiMBq37$IDQkHLSk@VD4Ieb2TgRKr5N?m>B^7;lOOD^y~ z2ma*??f^Y58wHfiD6@Uek$ot>I34@OXiKN1KB?>t)7DRdFix-0%Lhol7ZuK<%1Ad) zVrqH0_uh(?)_VM;2&d4TaPSSYD^T~h$b+Oy96sJHMQ*~yT4lFI3`e{Q?B@>)lW0N4 zB1@E5yH8Hi}j_Xa}VLRh>y2wu{pqJd+Kk8(t~? z!BrY2lQ%qSl(JQlYSQ|LOO1e1K$Ksi($hDWhuxaiuyR8zI=qFFDu?+;D13~fT16wmm;7Z<&j(}NLA#s6d&+R_k48qb%=*N^I`YPkI#job7- zQDWhaO*HJOzbLKrhvWLrA-V+l>c{1UGRuH-4{zuRmz(0RfA3_Q{`^<;a^=Ht3qm$S zn!`RyochxQ{~^Kt{ZKn!XLy1KQ(f^Xqc?wIUX;q_Mkjp4gONC>$$KhmXq9TB(`61F;;Bvf>gKC{iLstaszT>2gPu654Q;ey1x7_Lp#wVvgUs>9Y7kvJWzJs$jyY0&vjKxiz+;7)>( zCl+uOZek{acfKbY0ekx65)JlK6bCV3kqVlIL=|WAD_&2%b;tm;QOOIcBUo``txa6eBwq<904$e*-Vn&wT(*QA zs~e7?nUC=ecc#mS0iKm&vbqqd62;W7nl(M{h%Gfi;vz+HIq0A%jbPg;H4G{g3CmC7 zB^o`#Ml1|fshrA6V`ojFy-3uJN!MtJ3NYf;=CixM3mdVtoBmEk_A5a){B#TUQzS+P z6X3zS{$r^v1e+b>w?qO4emY%b?0`%TGoY);76+;0_M10@F5g192s6V7e}*L zfR8#0bK%n%N6sYo`Iiple2`tEC_OR$wz>tI=Zcj;Je(wZ#J6wl?WGzWoEmDh)}&8R z3gwb>joyW6L=^2TSmO3oGt%7MCYLF}L9wZ2#Z?R80f+1*iwTPePdf-J3Q&@>2Vq}; zB0~@o*}7k7Lo^l>sPQH0LDDxfbZI_ChmSOM75i{7S zL7yeIszZ-U;w z3VhhVMXddVzbHrK+?K^iW!5a~UOxDvp(1s;&5Xd;9<=z#`_d6_ef&K+b>`OLSq46U zW|t;MBt6-N_3WBDT_zWFWQ1sX>2HXvJV!FVHJhyF0g1-bp_Vg*x7t~`M5ghAE>4x@ z9vfxPVhFSNWx=TrntGqIF#hSJ*hW?7<)MhNYI2MR>U)gV;rE^%%DzS+K)@ zCW!wgT^wPhLag?tV?sDRf2@O723dq$I^Tsmi`tE(l;+2Y{Z`=BYNi4^8*CO?TA-uP zsI8bXig8E-udk0p)BA5$;?ZrWem_h`^G%<#cbX`cQEfvBz;H778RhF^{Y6ED)*#YJ zke?_7K<4r$Pd1j_kqj_oNk1E$t2L&T8K3d zpj#EGX^h*Vo%=X;mY`Q#eOfj)XLU(31Or?!9^A=#qRZy_$oMi@dr6!KtE|;;BmoOi%TWkcxfi6Dr4i{*R7+5>z!awm>CbL3xFNRw+NC5va z4TW&MLo#EZ=MT5S=s&UsgQk@5A0!LvZAR^wyxto&fu!yHa{c<7c#m@R0mK9nuZC*? zs0^K^^*?%Op9y|W=MO#msH=VyLf>DJ6F5tP_|M+Pe~9julz3#$G;^QA9OmBitbmfe zXka~(aFJDQg*u$*8^Yo)M)Yi>2jAJ}t2tk$c4a%h9d?gL*JteA`b|S!3@Mc3uF2P9 zF>^{HZmV<%b>=Y7ONFdwJBC~E{nP*R3-;%XeW!=sz2J)O|7&|PN7^5vVas~#o8NMi z?@34EXlnbxXYYc_uvs%%ev)+6(BH3ln{^GtSBhoB3nb6ZXUc{vGZ=$z$V@V1N|w%} zaIN?xl%@TFUd}(BnVOBlOeANU9<6vZe;&L%{(hnpd|bp?XoXLB*qt9Fpf$oTgpqj+ z=Parkd5F*9^)g6h0d^G6tonery|lea(nd`t$vErb#lO_z_Lr`i&9YW*;QNJMbl?{Ykb~ zX;GZ0s#)d+64HZAz)!6&_TKuZ;HLC>m*MJD4A^e$4YF73OfbRC-cYfo{Ick8YW(QL zIeJl#i^2m*o3jZ)_WZtR{?6dWR-GpcHsz1|vMte;S<#RZ>c^@!L!yVZ3rmqo13H}# z-zT(xuha=k>L?ySibnhxz}~jth8r!II<@vJk{?v zzAB#Ql#+MC`ikkmZ28(Ar~kh+ub#ZLXwt$T^Dj;8TW`}y-|!-sPWU)qs>Bns2lsX} zQWn%JC7LHfCk(rbBemOK^&rC1>$UnkE}@5Hs&mVC{{#!VTRFs+^!e`WD%pJFdnMg)kmS#f{HOrCj@u;_= zc@_mu14{;Hh*?hB$ZS(^Dj&~G#T$)s#v$lUgy zzTcNjMes>X43u*j{~r0rQ?ukU&CrhRd|%zUvDVGneBB#5H!77UOWYWLao;=_xZZw8 zoE7*7ei3M0A00Z-+&}Xthrp_@usLtAk~lB7(#QYYa~qwYU2>`A*$4N%wzhd6#jMl# zk?M({Pd=D?9r23K@cfO~j+s+Bf2iW$zd=G9*xNoc-M#!YnlYD6(!%!=soy zHKNxTl=18tferhi_`^DPNY1h#Q8i-3I=B!!S+)Cw)eIZSQYp7)&so44uC(6mVcev8 zN%4`&MID|YHGE@yfCIfkYc!`pxD=}I_Sodyk&bXA9p|%lus06*Q)d_54V*v5T-P2ss1u%n&xHkCY|N2ZE_0 zl>Nx|{L?E-cC9aoiMMIFXoD45{P3@g4%Dl~ZtGQDztQ{&U1B1njr|GqDeX=lNGDWk z?KzdDIXq&~=zv0$S7Y`@-gy_%_Q2t8SkR&?l{0(&4`43vNoVa|QoC*UW$M$LVJW&x zJ9qJ%zs@b5a*!GGetu{lxb2-J^mc$HQ74x?AoCpBm3s5kY5EPTj#X5KSZEI6N2Lz6 zm6f&mqR@VOJ50o$dxzO&Fs)bmS3yjvn1PZ{l zXsX`t<)>iAXVMJITJJTNWq#g?@-xcV9tu56lUW<(d}d6a+?~z6=@BJ<>q!i(Yz}HY zZ+Q~q_4@grur&ohGz3BL7Zhw4lRc1s}QM&B&+QY1mDOV{V-5x$X=rD9RiYyCeV z;2KQ%{e@<}_CpLf-@H?>si@Ol?XD=KB;tgKeiU{&B`k(3P}xwf-e62TS8?N)Oo!{( zl>XI z0Uyoiy|ph>ZDe-e&8I@&LvIm(7QMt_1<-Cc9i9U4 zx&GCi#R-rYAXBl@vgjQ!KV|=m=d1&9VKYpD%t=pnUwO2R-oHY`x7IIHhJxn1b0LRHtV19Z2S%}n+wAs2DUuUYkCg^tF$^lyTWrgNM+WQ24 zjjvyubhW~fk23IIS`sAgsRaIg9lnQelLMHvKJ2UrG+kc2+QePJjvT`+HVWZjjgdsT zaq=}UrQcmkf49`|&HgP1>7WG;AlUEbDokr#DJ-v}d+^^6KDZn@{1928^<1Ckd!Vxa zd~da~=~9rRp_q3;;#LP>P&`@pGxh;-`5^LNgIF% ziF3G~pn#F_NJC}g48@@XW~p7>u}PH0Thx!hQkjx%<-CD0hHrkA1@16t63;rfTh%}ogDA*7%97G=D(reG4 z(u&VFiMZj*8YfCk!~QvrJe|A}V%zy>6xrJ{s6#jvwu@Nf zHeFPQYytLT`|HCOdjlEh7>SHCfl!&Hvj~+HP@H&qpf*(Zm=ULVRXjo5*jr>NZ65T* zn9^?40X}t$5OZ-ch(%wGM{Ah{Cg3wBgsDo!t+N5()Kx0%X(UU@esp0uVITOpi3Q8t z5EXB6dL9nhpz5;*IAehBJ5E6uh@hq&T150+vE*JFwe6F^yQMYl(YVx1-}YT1Z6_ZE zIt&rfnanZ>apjZWEZo)Mqbcs&c$5Pdn_HhRz2?%g#Xeu}sCHQ%c?l>0Yy1!er+%Mn zlBw4{x8mOO=62fbD4x{0aE4qp$73$8UEz5zb1Z)BUYC7}FI;Bldl4C5CBk%OTh)tF zv-;6s`}Du|qIh2Qj#!rZR1SEz#wv+7ICsvpQ$n_2G(7|}ipFckfZ7q_o}C7&HC`q|DU2Wafh;d<9H>6GH(gVR+c2oypb$} zDc+LA6iKp7rIKWivCd>yLP)ZcEh@5<-Pm`=+E`}>!;EzXWA^3e_YXYRbDirv=RD^= zpZoLmLk3%RA|^WcrVp#{IjAiUF)ljnF0A_Gu48WG`27^nB$ zl=dZ_eN8G}YlN&z#jv!J)ahy6oeibGMaIDBm8XzLe`EdslDc@;sn_UN8rFy>E+7)P z%@fDG-!E8w&C73yk*N$+qh4wHyOdW&H{N>X`qhh&G48N#`6gC#y#6crA>~uZfbYA2 zbns#e?hhXaZVA32|MetEEmaYl5a288W@aZDB3ubv>?Kro!53TFiPwQ&J_EPbJQhl@2O^Xc!q-E$POuaXMIh5<9xCN3DXOIZENjH%|n&7i1 zy?;je^q(ak=H3Zar+nMiW2OIjST*4g7xutPivU?IDjjcKRHfdU^@Cb4%nPRFm7PuH z*03jGhl`MV?_41bg+2%FviH^sXll2fo3fD zVc;nvD^A~Sw>n&l*GrNb5$q((*7!_ph<%eMnnK+#o_-=&TnM{c9k=&DHy-BXCib`Z zo6&CKIJEt;^YjT`lK${`F?@Ia8g*F^e}1+!6pT?MlL*Eq5i(~l69TVdOmUVH2<+XZ zlThksUr#id??Al7PIU)Ic&+Xk2kyc@LzHsShnk=s|L%5uE5@lXhJ-&e9eUrA9L5De z8QI81T|L*)3Qt3}WDbZuhbtjQ{(LzMddSlC{ipLKzJ2U0#k5eZO|mRd8XVXS7%dzh zGR?*9==KI+#07ZmT;iv;fesrILH#@LYuc+4Mq#<+)R(t{Ak#mLv+dK;S~ z4evFH!fzbR+>pjldW22vQYpkH36}EW?N`amDLxxw?bsUb+t&0fapPC-gp41o1<@a! z$6iB}0(!PBm!ssdOV6jB0(osl$>?K2Z&?>Me^U;V#Y2-KD{-kwPFb`mA1#2T9Yt>$ ziw|{P>;O5?gpd>S{PfN%Y(BZj5|v%5E$OXU@d3sDo2VVxwMX?$Qm@Jfl+%u)OTBmR zn$zavLI;q*K&Qsrl$*8O^de3Itx7UzL@;q`H)I0RF|we0rXc~XK>2#-5nD(6SpmUr zN(!k{cCZ%tpp-Wq!l=Kzq{659d-4cjhRP>FOC76y@%|O$)Lm4*>-N3@{lCo0pgcAN zVtA^hv;I7>N;&Mv$V>C%Y~s9eGtl1vAH{rtb8KunUkTa0FvJl6Eu}4d#EQ2JlBUdd zX!q$r_%&f_0b<+#NQkM&G3tK!$?1J@jjoZZ|N3@Zk1ykfj89irS)g>;0A zH4Zu^Hs|*V1%2N)8!5vdqM}t!w4B`%W-ChwU=><)e1Cu4AQl?;x(icHlkT%Fh3@}S z73S1muk!Lb*7`Jt->~?@7~KgX^%h=%Ql*PWhg6BF2l9tJ(-nt*qe!>YCTx8wf+~AR z%U^`aDOWmAH;vt-?>n_UT>?3S_Y!5=GLxuObaYE_$Kb zTfIb~S6&LeA4n%En3%HWkOvw)qQCMF@0O|$ACU2}igb-e`{t}S_V1k+o|0GJ2v8hz z3_4(apu?EtRad(1?72;Xh(eWxPIN?Rk$s$Ol9x&}mo3bD$jF%i&|t}e>rZh=_vTsc z#6^{Z*Rq(GKcK=zoVn3YNz5>TElynXf4Z_w0trAL6o6s8@cBIK@6;2=*+0|D^m)8f zs`RMIxJc{xttKh#V*nBMk3o;_k)Ye=bC>#o&c$TK1FxS3Teq;*ZvX0=k1sU0B;3B( z^AJZcv5z=E^u)F*x0^)cZE zT)N>s!--w~VLQet79HgM^c1SY>=LFN2WWY3^m|(eAI3CbZ|}Ldnj=34lXDUfkG&o> zf=!mf7Z&pxJ}dV(hn<#@lSjbwo*ZD>c2!nY?#Kk*yQPTN=DKnbBYXSp*qDu@-Uxr> z+Ymbgnbzu64~ow3s+}R0Ecu!+t3a?&HR)4;u;!ms=4B+gViI-~P4UYsq zsjH)2U`82NhNl*KBsjlA*Ae|*)tsq>7_x7YwCL}mZRi;Pa60fJT#OrQ_q79zSoV;D zj>igb4=9tPbPQT++QggFbu4#cpEK^X*I= zz`2$F_D|XUJ2gFVq4|TnY~CVKnz?Wz&%OT&`PclZ{M4|dh2#Kx<`Yp8{Zhi2uU+kB!U z?+w0VyXhh-dW=d)Z8)E(XdY-HJqfTG2`HvbQuxZB{WfqS(<8q93*M~2J zkY-i@ev{i=^Bz`6yZ_6nDk=W0+!Xd`(gofX_V*MQvi6mY_PRgny_LRI$$nq)a%>6Y zg;5KP%1+47{%eS~kTZlPlA`xNg^ebej255JNjaHz=t7V0Rz=j5RgCS|3%6 zeyUV4+_vpKb(o?TD$r@SFBc*4&l5!-Vv?UwJ}QM8SIHzI&e| zX)Qx=O6!>sk}lICVP!rM*yRZT!~9x%uXW|OV+Pf4M!eU6ZDr}7w9_JOZ0ZF;_B`f1 zpR)N-i0w({Ip<(FxN$~!n=nXRdl^#*ZSp-seLhPapzRsK<=&i}w_`*WphH*N2_vMp zp(vO(Y3}RVPo)LBWR+J2%U+7x=R(~`&k4ZzIHmvQKUg=$Pm0}|a$QilrSAvscCGv!aKerVPI@?` zo#U`+CUVvV(25tNd|zs8Pnw@b6n=t*@aexF;cGdS_te`PhUSC*=@JiCAGA>{=*fWH z-}!fY=egpJ+E0Bz%X*Dq%fB37e53PzXHhd%XZ!cl5Q>(tAm*`?OPkb~1exUo7HY0M zkxTyhRpol#g9W9Qf2G1$w*J3Tt2CEGsoQ91N1qqjeYsASGsiwk--i3(UcP2K19v*o z+hJrW%(a2&ymoo)`$U}XqqC8vi((?s=Awn9&ADfr%C`DA>6P9H)BLr8pz<|Q=yktD zKW8*P%p9_o=oC=~syAwt-bpH%OXBn{?c-VkvF&-AnTt48>3AHPNIgf=e76eU-8#Mv z59jA-EU+$_qZnt6^efX3))<2mSY6JkfQv!fefnX)n^P^)+wv~a_nYw>tZ7LYY zT6{xF8(tb#C6=-GrYBCznHaaePzV+GlM8SQM=D?|^#q|0bcI<7D+nLYA4UM^<0qW( zAv3{)M8PLmJNq%@xPj9r%3CZW#~m8M9Y(~--De!%%t;s_r?aij=N$*mwYAd!K6gyV zp(J=mwzFU8Q7_E~<5Y>v$4OiIwIG%@AgHKBJGfGRVh43H?$Mh>Uv`F=NtWX_S&!{xE_Yk?+#BWRwKYnQN=-e$KX?a;y+z~*vfQ10 z6MPo$^L zHOY$>>`{vyVglGD%nYMiY{ua9HmL?%4z|0CY{t%m8-Yx5wag__)nmY$$K2a+LgsaH zv2au9rM=PPYwgB&x-<*umxI&c;n~vBq5r zH}|VNB~D;ae&u3~Ol#3|gW!wN8;q7!ZiC}GiX+kTsY8Wm_KLYmVkNCixQ& zhn?nHy!|vmy9v0$u*ElC&S{g}35nh7K+7>2Tot{KJdkVOMtzS6dEg+}iYhPY+c1GH zI=RS2vUQ%E+Ys{>L0rnTn1+@#x|D9qC$yIS(Wx(+eK0Mx2ECC7l}vOrtK;KDbtsUL zwMIMg6`UVUpLDr>O=GMpl%35`O2?Z@HQF^3fKZ`ic=|yA9|=V_7&4=s51eQ$GEd19 z_stHQ=4;6$bp@R@NTX#lJT@cuXxQX{acZQ<{eK0GbE-rr+Z~suvFo4n>&`Ha1$X-Y zcMudirU)@d$-``yFwpj-aqhd^IJ6XH?~U3=F=oAw)y>+Q+^_D(JRRE8o0O2xbT&?D z3xhb*rMQ_dNw{=n(x6rup-CFia6ajD!^6Lq%XHtI+l_8soHuPOq3gzta2RQt8aHDnWX*taOjn= zNf1yx&AQ#5 zA{v3rQ`_ot{HDSX;3H8Ya;DXW!Ey69K?yJ%`#Vl1V>$o(-fZt}u)E_&-B*bbFxe}o zvkP!sdiZ5_0%GkeH5967W^fxmsj$5>?l;PB5t{G*o_aB$Jg*R3RE7#G*Z4NbjM}rJ zZO4TDqRDN>)g}^Q#n8$W$`{(kVEuuypX2@+sj*D{J*Th|ni!5M_gltHyq8+5@rVDI zHoeo1{Tfsdk*+P;s89l+t2Gu9`G=1aoD;*ohkv5AbTEptVhz7de?XMC_5WD@%)mwW z!tts;yu4q2yTQs|;Ja<#sXJZHlUg!OA;Z82J^E)1E0&T+RWePpRX)c~`#-#zmt0RU zpf|X6(|0cZXfwvB$-vzlWkRi!9@9I^OZvusJ1s(jw*?V5U1M8v7B$$9UQ8o_J6#Us zLJ^5VaQ!Y|Q~o{n5Ll5%c(aC`7ivVaFV&x$eR`uZ5P&s(BH*iFc^NP8ygLv%A zZl~euyWne@5Gj9}(L3h_ztobtnC|Xh2ken33Iv z1U9EyXy?%6y+d}7SbWTKwce+8i_mV`GE0Q%%g8IFEw3izx?-MDEcqbk)74&~P5*ZO z1O*O6SCCzQ5044H2d-cyUO>{|%U80LDx?yj)})p1MJ@Q-(eg8AjWqJo4(b!Euac{* zI`fWiE7K>5%#QST$9v%+Q|jcbc+`q{Lo}jL^1knhrh8j<2PZQd0#H8R@J&xF0+Iq3eZ$9FYh(WWf!MjX4&X1nF(rlQwOSx|YL`mUB45ew3MC|S zb51R)=?y>TG7`qJe5Cl`*!QW0#g4nG-roqa;o#(Df4hO=R#D!6yp!44&#QRq_mLLb@_s}@`&@xw||qjGkJ<{20Iks zGzOq53+|UUYL4PvFo5yN`(l#j#nk4~R zj)$N*Ix9$WM0Pwrj3FI7b*0y=;|z@6;3t12jk&(sjF35~_XcelDlNc{aZjYQ9+sR4 zGYWO1>Emp_Ux236+jt*Z&W@6K6L2+nOusaL*(RG@SLPXqFqqO~JC?x@)%g!#$*DF^ z#}NK=hn(GBf${eGP%~>RUu$fQA{L2YCXGWdcegn!7C63dt+AK~s=n_w;W%;-=7t<8 zK%8@jW;T>bFV?C1&Z-tH3(=EBFkKkUuf7J$ z+{4`DiGX(HEHX@(S<`TtxEONWl%#3b&ayty1<10gGuEn5cTcGWHE9GcS?!I{vR8$S zPU(xWz_voZv|7|6is_ZBzM@jM8kT>F53pb^tFWJvLbG5-S&z&--%l2M&gGL2ZRHQ; z%y1TA<=gEjIYbt0MA&SzcY;D}YD(vZ#w2^g(KvgdT=QxbN~_;hy)$bU@NX-` z$UsAmT)^FOp?R;G*#cB*E5Ha-I|epywa|Zad&h<@Hj~q)1KGWnSX6=i0AGjSZEx(l z#h?^Shm6QaHqS5==tC2qUBKU84Uqh2e7yN3;0UvxO0ACAE1@y6xlDJwqaP+h{xkJ7 z_nl3w!u`v2JDt=0vvgt^>gU_hBgXNJZcAc;b{XFN-L7MrGNbx)io`7RV*2$gIUX zJ8j>?-&QkBrs;9xxg~qfn)C=wYz^)-$(qo~McMhCh>grBL90tp3qq#hvf@ z2GS}!%_i(hLhA{PSpZA@9Yo_gWWXF1qJJfe*8bjTzNTLcryY`HhML(Cq}R+&Z58ES z3a%TP@Ok^*pTo~Nm41haV=Iq_T@ncxx3X++f3w?@cys>*=Nop_F_+L8GFi50W{qL! z?LGV$!)akkuq{fdHA+L$olF$-Oc*8Ae-Y z@`457x;cX9)9)qVS+~@W0=v3nNf4T70Jhv<)Mn%bSy4gyysQ1)x1pCB`X$`VIK}~v zHG`N>-poLDtp1j1<+$>R#`b0T5m{TCZA57mF{|O(Tj<)PJQ4sm3xY9zAHqJTdJyjs zYKRHJoVN&DLJmDC;H(hRr-yITI`V~Y-pC08*SosI%ttDL({M@3xaO5`{l{{F4k&^} zKaK&9XyfYioUUPAt;|2v=J|<-noaeNl2kRu+%p{$p#_sqng^{R;e^x~S^b4g^kUOc zT$g4?kh0!c?fF2GKEIzlGuSt=sc5I-PR9FI-&berZLMgd=!b74j6VE`88vS_L_J^m zu-#)jT=vg`&*uGc>Rau!tr%{BgQEBAg*gSuT&&Uk4wz&wE%X5!7 zHcZ5VKi%aVYpAgSY~GwQ{Y1h$(B*^d zju3Sv7?ywS*_VpFyp*3<>*>)(_>uwjPNv%h2AL`xe)%@;lX;7Dl%>~T&mR=8PBQqa3G2h-~r5(FJV z@?$&Dw}Rrp0fZRhoW7TUeAx60{JBLhS%nrL>F&NoXl%orK4JVtfy&lWQZD`zgJ`q@ zKLPK$BUWFvUN~)3bfOd0@CtjuO@+1Ueku_wq&fN(G<_14(+_%WZ?*}&1Ja9?&}@U2 zUJ}(0xD`hE=0`2C%bWOiU=q;xD8jr^3;b)9flD1ZlgNEQ|Bk!9TJ{Ty%8Xu!Y1N>g zu?qj;H=fZt1RaZ|h_-nK@5*l8&yKCvk~gCKWmj!hZu5D!3J&hEW?)9uiCug9L_nGD zBKWzL3-Q$caSG`E&)(bf+Xj>h&QGsrGGzyh;eAdwC+}(&jV`Im&dp(+NcP=Z%}b{Q zXj+D;3+3WU0=lNJp zx@vmmWu0wez7EIXZtJXU5vzN+XRd-SNJBrPP@kzMln$H z=8rVR1>L~;80cRH*OlU)3S{Dv1pk;CgJ>a`{&Ez~%M3Xbq|NeY`q;-bd-c2_dzG+b zoJNOk&VRQuzzl100#!6a-#RzBixO(rwU{>;2d8C^rl=Nw%zqT^UQJZWz7LqM710OZ z{;kZ{e#1AoGPicaQ5YOHqD2FH3pYg<@XqE(G4Yg$ZK+FCO7^GFDbXPsmJ)a&);opwB8=9ef&cixK-4^2Nz zMiDA{AgRlO)2=9%Fz0?O9QUYRGa7r=7hrVcKv$XJdbwUid)CCRHai-#{b38Y`20>3 zZ+Ew8>0CfZB;b=DP8e=`JgbfLhT01(pTI4gLa9e4Qn6smS^w=Qqw)y_hAiaGSz(0w zio>R-ox?{ODCcOM%OeQg|6MVe;UwQl8srLOw?$Vvf8wqNm;l^@&Qr_F#K|@%=Bn&0 zD*@P>9N_M$sdQY;fAw!s#=aMr#e0HZX}R+NBg%EVe1f$GP7}n7;FS8Q?&8brl1pEx z-;_V+aur_wz_Ax=vJOH&OakKeqW*So�bm_%xw5?RH{1@Mce84gfi6zJp$@7S*3I zgpwU4j{4+*?D7#CyC;MB%GHGPXhKx9)X8Qu!j&`1JCoA%RN-T*P5sB(u=y@>^_Fg_ zqlDa)#D(`0Ud`y$ZMG@XPCrax%MGUBY7RuD)jOEB2L-Vn{TuAX=p2i5Aj{Ps6x4%C z1zY&!3f;pf3ON$tUYC0l8K}GMh|D?x$9?mLssmB54D&_q$)Gyc&};L&iEz@nDM9^T zdza~S3x!{)xmY>y?XnCau>44M@q!yDe+(sX=~(~PZ{)SSw~@v?5&YbsNj=Oa%NJYv=$Dwq z+Aa(O02K_;@r>>gw@VItxew!gL8 zD7`QmNunDBPprQn_G`-W%WemnE=DHY08lw)ef9wfZCqBQ5*+2G@vF(R znI7-v$F?r&-Qng{V@1Z1MO;Dm8C}=Se;%|R&z_G25qZ3{x(Y2LKIKEm(pdF+)7`K~LkqriI4Pmp{|6{j8ClOnQy{#}AT zE|8~WRsgxmt3U4%2{UB#$);bWxpP86*H-UP+Oh=-N-tkJ#kBf;sI_5Tf>yn(2R9q( z&nm)ZR9qh?&69A@hBrsjZ~Fh!U`M&nZtTLgZ4ddV)KjcVaem5My`H8VC&8W|fN@az zxgCprw%1ptAze(w?qV<=@(4(F{&lZMoKoX)XA%7Hc%r*(*v^jv-iNnZ9ls$xV88j|IOmm!W^> z=2JQ37+_6kqymz96phj5HyHNm6!_~VMkt?{H|dAbOaUS9aEUp zli3%x0PwLo(chK$SN}M3xBYFkre~PK~W2QyaBULSkpNfqo(RX9JVxUCvwtH zy&e03ptCtdC@YY5LUzhMe{zQA6lYtIp3evW;0+Se`sz=_}le zit>{rr%0^2B>l!63r_XR!C^;`<8k{lZ(A$pj^Md_o$Wve|I;lre%R^Ok&ATxGW|ZE z)wIZ@H0^X4YiR}d;Pe&db(4h)-d*X(`O!-IhuQAw%3$*l)nz_qBM*|YIm|c~6F-qR z?WJc|D7Z1cE&HyCdc#w@<|F6JU*%|DV@hhH7^lBhfpY5p!LB>9J{bJHA|Ft8_Cr|W zE7<@UXNUX{YhyD6@|hmpdVPW3aEm1zR#E$1-Xe5Hmke6ExV6d8ZnNCheKg(wWq;G(QE7Hva~-fxFT1gw0LwhCSvDI<%d44i zc~{Dd&eqeAH7?k*WgjCrvQpW%DDLT^AqtE0&xs9sL8hf$W*vxO{p*I$g|Oy5?gtvw zSM1x=WsB_wh7TEXp@}?FH`wkxU-l99!$qLpQnPf_t`GTc!7=F>-fYBaYyx6Tt)tXz zDsNt$tXNh?Hp_?F+rroIk$sQJyh(40>7LKTF#Q%R6%>JAGE%*F8$xNtG;J#ceSKS*G=hSdt8A$bDjN| zA{Ebe(VnV(QEI25$#_^}^>+EoZ70_ggijNoQTGwCQ8m9u01OgE8x>Dn4gy262Wa;e z0NK>%auH^r#Vp15p-y8pLmxa<8PS$)YTg;gjgpByO#{AX*|xR^je}x2yD{crX_>UR zOY$I|kt|(y)#E0_1OdpFEbW|Dq&_^Qy7={>cXo;h4pK7jq0RvTt?_h~Aee#sDCAM2 zn9;-=reU zJ};Q)e%NXKA-`rtjdTEV@5AVMn~+CJxgc>z$NfMu@_tg4Vs6X)rRCwKXt}4eH`yS4 z(RTQ7v9WK$u2i3Rb_;F;bT!uNyKfA#Xg$PgHlsx_Omr%A^B~bQCh_j@4bc=%$*%Lg zMP%=ceADmEXOy*U3a!YAaFM;N{!IYe=WLC%>nx7ws4(u}y+(}Y>k zqu(EKZbsxB83lP`PE;VxmQAfk{1*pLl}NnpOmVbUT}Ed&b-{bWJ8NnMeKXP|M`>0X zhw4nlN3$3p698rY9dGf2%H&{n9PixJHH_?F>w%cbE!?@IcCJr%JnJi=ky%`jk{5(% zsMW5~re27-gF*JY)_Xb0=i3#mAK}JNp!%{TfQP;28Niw-1|NC(o3T2D(e>%xVQb)T zV|L$=iT9E0%4I=I|9_e~f0W@y^_iin`>O%nNY~k$-uKeCpDoBDw^aAF<#IkT<0JJ; z=h2bhjl4uj%WE{#Py1$=-@ndemtD^xJES)rZSG+N;PP7J;*mM*Dq=s1*KKP86r?-1 z%1zS*0)OSMANWTi65lDTcTJ8mPnF0VC5`(jy+JF9k!I?kCKWI1g^__=WjZOb6j)8l zF|hbzZ%1q(AjGDe8X%TFIAp-;cf|<(d?ZD99DS{CRbjnrNjKsH;q~+rEq-1I8D~dF8KSjc=ys z5T}$#k;a&rnHx)T&&Rmne^SA5X3_C{v@%#m4Z_NEP)1zSq7w~QFyxHZJc+BH1NV*vy!=T~&#f>xq*8}G9*^8R^O$kDFrJ6Vh91v3*b5yIbUVBUbC1U!Q6 zH%6-7{{KAziD7R-dWFAJhOA5GO7^$_J_bzNtZo>{8mET*%S0v~>SX``hc)Jb_n5_k;L|JTHv}bCEU-rt2LzED!MhP8{+mih&_W3duxOm2qj;k zbdph*LrzlGz8*=d{odFaR(+|*c^wGok?UML$n9Zqaa=b^A0BWn5qz08B36W$Izqwi z(mJ4b7mh%D7~bp$>MbY^F;t2X|4N))LV1Ocvs%H8M|DLGu1#(lvnKulWn{#+r;Zi> z{P^!>0d`2F9X|)!UHS2qMj*P}XmnZgz@F9cO$uHoNq-A%`BxBmq}&8=XjOwN{iZ&h z(O~%iU;NXqO4LpdF)peoA6z3y*RUXce;LK3-gQR=@^1O|ax}_p8p~qe6 z3i88z=ycd7&)UleuB7}Gup|>ByZZxp(A=TJZ?rl2;D_0^5Cy3XlTk8~=DK^H^0g-9 z@c%GX_pNS7s6zT>M9LP`2pbpe0NL7lPSL!zj6|pR%d&XCPOIa|zGan<5r)g3tXmiT z-=X%Ga~{*>g5d>EQilc^=eSeZ6N~8Lya?o#=X}BWiL3)5(%<9k`5__KBJh+R@l{s&KbN3-TogZ5~zvERr_yJD?9588_ zNz99^Xk+3hecXb6z)jB_y_`Mxz)3RHcCRhm`vJQ|KTNolb4nKePK&Y!A_F5p|``ur_N@#vzrvP+oWA*p_Y}NVqAYbYuTev+15|=z!m|laOSpFmpMM z_Y?w=8w#@l=A5JLcpQ+Mc9R2DbbCw)RtG1hwc8%4lxGUIvWW5ewtS2|-xAy0LC1b< zLj_z&`b*Yb?F-7x;pqJqCw*6sC)~nx1gD3-_LL%1-rNZfK#y~$Or=|_Ar{FOTiR6oa6_V`CHf6;PG&6*U8D&D(7=6?>246)qib5?4Q(+kTJ zG%op6`VJ8PJJPkh^EkZ#bc3u?GHik0@@-Ge=3SuT@T_FMRKVA~(veOp%vWQ^w>j=% z$QXsxbStP1(FQYQfr2i@48@{6*PXhSOAQ4p4EK))T^P4oLaJSi0gia`)iDNOLLi{7~0qDQjBrJ@^!l2(~z&$z+#<^rvn;Yi^GODi*JU4uJ)-kE;gS<@B#CQL2hdr^}Zl@wtYhq&W@92&Z=Mi#l@ z*I!wOzZqCKd!chqi})n4OoX(`aHz}Z34vTlGR>0ghklvQGc?LEAcv_S(x!W3w5*Eb z1Gm?%((&3Ibm>%}pkQ+DeFXwAw>NQO_4zKJ66q!ih*?TwZoGI(BSRyxS~_AzEodcg zr=Om^&KUu6A#WBB48&h@3?Bm#G2p_Dua$w^J-|RvuUR{)RQE9L@32Mli*+rwRGC;` zDY`U;1^ss`kmeDZY?96aKj7aZN)_&i44(+=bfQA>I#Ua;Xi;CAVZ##W}d|-`zB2 zLfEC132?9HRKt7vM1K=5cAb8_evf#Avid0lr_1@-zc;p};4?F3H$h?`d)-!lzhiJU zcMjVC+Dm1f)PrVtNzhUUu}wifPcxoX)^Ug}tB0HebG*s+N_W9SZyVjpc3l)BPV@uX zOtr{WNn8Y}40ni;+B@^EE~EhjuZZvvLTqU+s#*qXjU{!M(?KbMov}{!@AZ(A_I-8> zKr>G(sG~Ys+)xl+zHI2NlOvIWZG4Q8(}B^x6D6j^w}BwrU#!jRAT(r;BCfDP#>Z~Nb4nBK!?AD72tQeuDZUGzqmom|=|f$3e? zpUCWVN&fpWu=-QO5)HEcC6If~f!)i$a8(-ved2X%D{(=$TX3XuVPz;b-H@E~#;uR} zv0AjDIz)w9c+aVADxC^C*P9~QCJw_2Z2w;PMZ-6W%2s)m$XMXet(NroQ#cCv1n0rB7GsgDVVYc;F|QUgO)`=9KTrmD>=!{u$pBhI%>kLrEERAHh1smjigB zuqu)g_$Vr)Qr^(8Y#5t)C_)9;5Gme&@2R z`$PG!z*Dg)v(KVr;(HYAXl^&;>dq4m=*^E<=YdY4*4nbBe!$0-C_w!L3;**D>nOXU z!()0`WQ&|ALFgpSEd$t$GDwQ9!M^&Aj+5O5JoyOO#804AnJ~e9TL)iGq~WI-$C)9k z&by-o4@G0oSy!B?fRVX#6JCV1@+J8f@I}QUyr1(cU7T}U?tSe|T(nkd8`2V-WTe!1 zweNDf+La(9_DC70-J{OyPVU-R8ZLICCs|%A=u-33%fMi_(vW^>8*gRC#oLu<&#m;^ z6U+MN`VmI{+QDY+FX(Sxe0nOs6$IpV*-?4twwm9(=RA1`nD4-1n zRq5W?)_p2czx}CXc*RHw?E=4pkOSH5dQ)mB3%IC4Lca*wi?9mRRtR$>!M!uu|eqv27%NdeN6`t2%LN=RjzUor@%Xx0ute zD1$8OTJ=ZBYZa%|FGQr`ft$ASO<}Xf?34Dci|yWjuA?;E`7jl${FFMYkYq0vo=`=z z9m@hwn-*dZcB7ZlGj+1qLaH>C-GHBmO2^lf0~JFS_yI1xzMBSN+X`^ymD2%zllJt6 zROs9#R9+@CE*s@2xLuJY7F6OP#NBlBdq+|&d%+=QTEKH_OGmvLe{@6d1xiv^6Z`2# zHVb6S;@5AXUXOr(7d)=xJGke)S3B)oRC2jn{??cZzMyM1U+el9wpnfbX;2~0T6b+~ zP9pEH%H_~gK5APv?=~#-MIp0za35Cd1z*s`RGQVty$=0G3pWK#8HXc36fY7y+LNZYU$Mg_bw-DCihaI^=N z%jxmI59uaT&u=_U3MS)WPKQQTl2zn$2thHh`H{|4pv&6}=@hREMh>3T;70W?8BQ zS|=t`i5rz4j^yD6Ps_~ll0zD0lz^{hhePeka@xXST%Gf=R}`QQ>uqPH^7k4j6v(yGJqcL&2@V+3T>H6V$P&WX44jxQeDS-3gjoyN#UGcsp4&*o z6An}b!FGdHP%}2NKNfH+2p!&aepvVv8ua5Zy*A=Lg|)<2cZ)$(`30=a>7u0tXi(o@ z&P$Gc@v0_uH)P#$$;;-wy?oOFf{_>|%(}PuVi(3@hmtB~LTv(Ys*2*VyfR-0qdog|3M^xqT8+f%H zLE7vfOsrmx+vJw?6)QOsiFgrgMsBUD=yESHijfUlcccetHz_SO*>&oE_Wjs=UR4yI z@o8rIp2$Y~#FZe$0CMe%E;PkZWLs^^Igg^^GJ3(yt49$h+O*IG`;?vF{3TRaZsEh= zucSYmSi=xSkRsxO%1Hovx1ZnIxJCw^cu(?6tWilm7FK^+CiscXl0+$_-PwQx*)1&M zm$=m;IwfRW-xKz1zA^Wx9rn*s{-nji+uX5W!(B@XU@2eZHZ)}eR9XWrp6qmfJQLzP z8*hBG^>=mFJ5u0^-p*2)dmF6?6P0t``xyJ$PF7#KlcZvv3gUgJO|=}t6dOhtN}+Y| z_LqRD+lX0>qqO-tRi>S9a%!+BS*`|i0qnkcg^R}PkgJMjtyfyh<`5|^10MLowai4P zpM*p0=1Ms-QO)yZ0_>YlPBcF&TV9l<@FIDQYxD!o%v3CQ#${oQ!Qz0I~@2d5?Got@@G&u}wnsZk|4Mz6lVJFFN0It7eej+87Jb2l(X!7c6!M=k zYiI(Hn6fdR6b`vWew*ZBcNplF&L>!m(9Ts#x_kR5!g8R_65m*RF`(!PtRi)zv2~C; zy#q#=hj7@qP6Lb;Ci0UAeQHb1X*YzR`wKbLd=f#$%XLZ3r|~CUZ{+8eeUo>Nan6cM z&$us&*!^JsgO{VA8B{^}my1o|^x5(AD9IZN%F~jmKUqaNBXg(dlY<{q<-MBi*GuCv zW**w5^a&bvpg7jN_~xwHXf>i+3X)h>NF;sE56$E>JOtHgycBa$JJ@(@*%JV>BH4USjXh8TJhF6|e3FJ8Iib3*DDRFC> zqJz)`i6uz`=fKZ2cm(HrC;xU{vk;Or82P;jnIrJF^3{&)jNM=_G<2 zA8qV8m>QLcdW_U$NIDZlf8t8rQiU`ZS0-@3LhBT|3*k=r zYY(bSgBM-p+)Z2?-a!sMeAnXpDvT}TWF&-DBM@k&Tep^_Y?q*X+bkg-Z76z}pbNw!#0A7u_#quUq0UBMa3%Yv-u%X>o6y*SkGN@P{Mn~kQ76e&# zVTvo?>;jdIOa(MY09sdHjpLe+IackPdizF>p8M`o_y!ss`@$}X;!a!ka?z^4_C>Mj z{C|djeA?==f;i_T8&tG3>I(fL}{U5SP)a{Bt z)g2DxzS_`Hi^i%MJ%NqGsy~k7$bRuf+)!?HPAtSZ688MQd3MGto8s0 zM7vI=I=*wak#G#Fg3qC=x=~=(++#wFnO*PG>#V(H8_sK8(O!drF9#%(hIK=@CWr#+ zzu+FL3h&V^x0blS7n&-5vEri5lZgI<{&Y#fnl?!2+2(t4S}n}CbOR#7l%0}@&&QWv zY*M({5wTdEK!pA8s9oCAM4D#Y!(?3$I1Th~L%vY(CyX9=wfQn)3hRr20~exuVVbUl zt6oAUq5Qdw9idh!SbZne9_mJCuw!@5l=m%mS`uGahN&*qir-jTYU zLMtI}S62=UVQz9--jC)-NBz0LNIwj3b7|n1OXli68MuJAV&*>98|8{-l)pab1$c&D zdil5*{MAUX{#4oNPSLAc>hq9pjD3U#YKillrkk(XN*(f7m3`WLh9x%Sw@nnrB8r|y z$0y3>0w;~f)R`|As+F5~taR4@vgI{|KSA9`DfR`br+U^Es0XT%zp`hW-1YmLV{5Ht ziOqTx(*mlZEmdsIaIaXtaIs|}P@sMast74vzC%B-ol{eny_9D3B3u(`Z>Z!H0+FL7 zRf`&orNf3bUXGl-i1{o{cERP!6tdO#&CL<1Z&{1FGDSJN4wnnH`qgq~6}m%j{|fhB zqQSH0PaqCQg&IP~n7IOu_zRqE8Tm31{l@clqQS^g;=BbSSy159$F|hHQ$Vl}Z1W8_ z5T2M>OVW=kWu9amOOA%Q08ketj;@4T7bz*8Z9&In`@;LQQA4kC5WTlUIcJYolp zP8wnMDfc7jM;1Ms9w3fw$FyrcjXElfxGGA}@eu9+-F6ESoV@yt`JC#2!rrKdK5?-c zNiY89&I8%w^3@+2)q9TMcanm4-6CpoPqWwGIa=N+S&rW^mU-0l2{2os66+FD^j)LH{NdiP8;(tFsyAN{ ze3l~BF6GZ;ZB{{!mQp6XUeV@P*t=zePk9Bi{1R2#d_Z+|{#umtATaLH8-QXwiJCY1 zv>`#Gpk!D+YsI=_c`F+8)3dSFN$M6M*m)Igf`g2za72Rdt${@Lw>Xelqw|%SQ@rtd>YFjK!E-1F zae+%^uqMavQH&m8Fd>Q}FaM*AhJM$NIaa1!R0LO!s`T-it~HaW#zm|Kaf6otov)*? zuJ{j&YkA5|K>AHpuWA3qL4C)Xkz)wy_sLXf_-RwS!6apEyRLc<*BEm*qvD@RCVFxc zlI?w1ZskZ0+v&TE)a)8Jb!1&3yJw9>S7*4(KK1G0(rM85gx>Fh;j^sHWTpwmWoh2J ztc6{;&pR^WZDG_hvyke*==Q{N?1W9-MaAy&@4!|0CSHtO;`-7Q zxza?C*Q8R1J|I35nX`e~m*4xTurPm^nCp4<+YU>6*mls(iuhXQPW%=$^$TXKciqsX z4hO7nohlHifLbaFcX--J;q(%(PqdU%#ogGcK&xizVb9NyovW8g%V2smF5DyZ_)!-W zhVY>^+Y$N0oIZBy!Pdg7oQwOUE1L(7#W+UMJ}a^Q^&odcX5wx-y?Aai0Yuc4&ZhtW zxwoauMSM}}t)Em*@pqQRnxwFqFOpH9We;v@YceflOW{}{>JelM#41<4$|hcYITuY{ zxk^UcHq_p5LPS%q2xNWy$lR!_!`^*E-mebwU+Z*wXvM`o{({?`gd61VWDm(?v{&OT zPt8`|St^5XaLgM7^d@ay*FF|d%rBzJ;0X$HKwP%mif#eZVqDXs76id^j(RQ?dY-}O zIGt_oYr$pD#EjFauDBt;oPy^t+nm5-1msL>lfY;t4*_8ShUVEs7xX?O$CLRDEu?;u zjtrXK#J`;CazJFo&km$fE&O6G2N>wgtRELga5H}0n(y$S612jF3F(xIjL&;4sh|0C zPA$+RZ--NA(l3WD8AZy&5g(O`&R3a>m5bt@WFs&o2YA`{_F!MMEwWB(?D-Kp9%RG` z6&Z0{iT`Wui|%<$1@0!wY#Y@e2}6ZR-X7ImI68V;v-fQ)(1Y67FUcbg34NKet#~xA zpEWG>Oei2BRngK;CW*MGQZd!B!ZyZG5}g#xH*5v_N)Ci=@6xX+bP2i*=9Kx&$^yM! z7S`UR-?qZrvN zFwK#ZN&#>NK?ZD`;@ig}!j1~kJnBytMm4SPA0S9B3x4&EC3sz~K8yd+pazC=%{bJ@ zex1zD&x3H$(Kg(kxWtBcSsU@Tz~3*(wJx>Fide;YfPSX5554V=OSmJiwpP~E=;<<> zPQS%knp;*TG4c0lz&_ds(vxZk^N zj#0gN(07h<#wPNbF)y>EGR=}tP}A(BU0?9ur_X!en#>C9a%1_5N7BH3ODebvLw5y@ z-WnY^2r5Hbfi#XP{t7iN*JuW_ji4#e(ph&-MBx_~3w~L9gfR4QScl%m5~`pxZk=r0 z1}cHuanj%lv1@OJD8Gi8XV5FrIsA!ruh90yjbNe-HZo_&d5G}HC}H%;e5b3qzHRX6 zmS+w99`jv>A`7ETkvCiW#mcxFu#m2gOx*;y0k?@A4>)BE%l$PUA#>t*_O4` zRA8IQo)#RUh4XLdWtm^+nlPLcVGy@h`Ui(IC6wFxqeSpT+setpPCsQ{BGpZ983Rmx zZ~^sda_dwoC3AM4%1$TqF->G$-7TnOuZ;iAp{HqQa66YJ&V1{@6w1}u6#UgGc$oWa=8Y!w9I9XnR-6`+gzG|E z#s-~eQ{82#ohNdRoi5)K7N9i4+9F&lL7NZ+|FQc-RAZ;0>lU(e)U)^*di3@(L%H*U zLXUSoriX9J{vz{DZ%X^yX1xoc0?GrYN_%l*zM`=iH*U)h9l?BNy+7Y0U2;M_^=0|9J#0$$2%mH50f!WN&PZqt*Ary_|+(GkS zAKQVpV=bJ!Pgt(pc&$q(Qu!wWs}00xWy}brnr-$Z*fQu(H+w4HGJt<%Wneu;bAP$0 zfaZ{6K`NvM32P%Tix*Mb2>Oo4q|%;j!^rg z)Ai1uPD#3B#vdbbLj1=D)mYyznyC6JYi;(E88qF#Dp`JHhSN9e^!6<+`@*k;Xfsg~ z{}xiOxoa-4ZVrhE=Bz%C=VgP?U+fVX^a0BGI{2IeQ>xGV@gm}l1RI?uD4UKOs#nHs zhQ1K$p}xF&D7a$g$uFHG4!AlbLT39jWa=0M;|%??#JR>YhmqlT^xH0@Dhill348b( zI$Zs@`n$k5-1z}0##EAA6TCj5Y($CPE6K9NNL->$dkAR1cMmm7-_2_S$lJN8<8#J3oW7(Yp0El_c+J@a_*`opW$XdNz` zI@PyGty%lSKS@vQg|tEKRtF-&jYuyoi4=z{-v^tbitjl}Hrp4cV{lx1H&sf;vlepm8&UTI1&RpWyvoJ*GC>smHl+m*$sTQ5QLBd4Za?-YmJx@hq?I!aATQt-pA8 ziB6TMvkSZGK`vkrZ9RT$Xo|Hwt!eoBg1pql2aa@ZTOWumY>+>4Kn~Vk|LBhQWBu@4 z+5)NODu+hZhEbxeol@kbaqe{E04j{|<08!hw#B+)EWEpEmZ@L=474NgC50@pW(B)Y zFSGi7PZjdE15N|haF0_1ug9@II4Rq`Iqt-e)s@Q*t_PS*NA`c8+qU9J6hzk@eFl47 zj3mtU|JJ>WR)JJV^6=cc9`Pm9rxfZ@9zfgxf)9RG#v>kN1wb#jU;UN8N zhJ^y0)|7(W+;Pe?Z)TWTG4Fd?(((8L?X}go7LY04B#x`pC)`GkPaat5#3F9wEKa0Z z9>QnE$MMm*^VUXfRRv*=;lCHQhx(eDupFLJx%M*%6PfTHfgK)C*y#1;i(>Uw2s%%f zi9eG7SuGgcuy?4((mMQ!)RLeXMlU^O4A&HGMqZ-M(UgW27`~>Telw9lZDBmgX7tS% z3*oi9MqBv0eHUkaE+#{pXAg<0!36?BmU~U|b9q}FCdVJLTioM*eb5LZYK>(1KPf?0 zS0uB>ax7A9O?c)ryTwKEsL^6dv;?!?Ta29t4QkpSFu8>N&JFC;dyaGkwMw>!mIpSq zMMS=0m4N1HacQcD9`?rf-4aa3uYU{0888sZNvsp^Fn4i`$X&6pANx2_!zpIIy^-W? zYQrzeGpEn2IkY_#%H5jlFdvEP?k)>fU|yJ0Pxyp~WH!p3g8Xz8TEN&+E91FG3}~rK8L| z@%dPo^1#DvfN^f3WJkl^ss7XM-PW-=k>lt*5Ph+nmbdL3Z9^$2Oi}YuKl%c3t!vL; zx=*Ei%%gnm&Q68zpQ$>wB70F2401NgyLVn^(fgw{Z}bWkTLwLvYqF-1r))jr&TT!t z$>EL=LHD{F?{Vw$qn*6(G8h6PzbZgV$u#nM@jlJ|kLG{le)L4!9akEtfyPO+Wui`W z;mQ+c58nha+xib(#eM3a{N{BuLspypaYSa4>K)QYJcEg!Ba$8Qq%G#8hVyYM)W6`=YVsIESA4-Kx{dX2E@^ zTSx_VsVs+L0-Nk{dG#w9baGLbG;#v9H|DkEa;@_PCT4_cq`rr6n58zQ^91e`rPI*L z71166@o50$IR(bk9!ntov~>+N+k@4kK9#e<;ZPR8{H2JhHufiT98+X3o++%p4OXV# z3W!D>n1}V@^29=-fAx*pYxpgf%M^+~DwlmbWtwn}qgx1^)1ChP@jB3&(FBX{$tk4yfwag|$}J^$&3 zu~IGo&W60gZO|7cG*LuAJ9L+Rs-xX8-qSB$A}6Zv1MB4w+I4ybF8+m}y|uh4W*`LT zCd^1pFU`rhpqaLwLVYjM6<~+KbkWDBo&5as1}mgx3S6Suu0R*337$3vO_v{yNnqB^ z9_83}dbCH|1P6lc?;83SDb-ibHf~RIXF29uj8&5_7(sD^m7|ua)%S9B2y1VsxlAji zqj_sTgn~jT^~W91srGWC2}SVL(S5w*=>3-SBrF9ct@f|Jgmb}KQkOfg37}i4SLr?h`<_(doKOd{6C)xd;J zT9%M;03jT8IK1x-iWMlFT0S^-o_af;Z9lERc#uK}?`Geuz}+Q6_P&lnmp~VUjTJ0> zc~HCi7(rb~E%75M%+{6Y4ur`Ltvm-P5^Onxmf>o*iVx0Mhwh+0gNOe+ewgWkjB4#` zeqpx-^_w2l)-QDCTAL_BqU6y&;n+DBlm+$@YO$5T8r-d^7(FL8LKndm!YR4N4Gz@u zg+|Az?e30HD~TdLnRFblAXq$j(ljJW@F+A9U-iQTJ+!6K>BwS9miS}`)S9+8^b+~R zU724zK%!_J1@m5*k~Ds+RDQJ}#+s8-6|4Rk@&2$k-ugQ`Dg zbQUdt9ca)E@3ef~a!`x-5AzhZ6exJSd82=Ydna! zx1ap0Y^xZ-uonbu!R&|*Zd|pr2dq6(xL-dKsX|!LWX0W;sE}Q+ABhb-vGXmVfR#y3 zE79e?%7KpY^+1*zF;Gv=F+M46S1#*q&H%aVDuWQ5i_*^n7;d%~4NO#cl;wRHw z)^!gEPGV0}F~S(x$7zP`+EjPQKd7A!?#!lKdA$ED@rQNiTKV;4@U{m1i&9tgXQ=44 zKQQA-deD(m>3*mOAg`skdXrQ#O53T^hnkOW8cQh)moK2(u08+iPgtA84n}~LgP71w z1m@3eF|MLw9Z zUgP>FmKD1zjD(j0_^?98xS4Tvw})U#it}V% zq7mnlBZ_ynVr=dW@5-71vUYdfv@GC!zQt+k3Q_X35!%x6QcWGq3DkBZvvvbh>NY$G z9G;LI9#h_pH(wX)=8=zt#-;zned9G7xD+LLm&U2i3Kuq@gEElhp%m*zkGZ?SLMn@) z-3c(9{>s60hS;!N7*#JFhL{|*0yA&uITI+dS-Ug4C3HNo^o+e_uc@qr8HMLh6SUx~ zz6GqlPaT%NvBoKE`?IJvSsbmJ|Fs<4!F0xR;?Kw`e!T7M$3NN)@_ID9fs=i6lyf>1 z-ALpVY~4Kw1n<&iE{X;J;Oh7l;uUdkc@VxavPtz z7$pE(bwg=9^aRzN{mXe?T}mWMk)73E3d!oYDN<^!;Pdb8@8f1f`QZh zEbsT47dt044l;dc_Yv7rg|sG~kmO)(ra1McVy(yWy3=NHmfT2?ge2?--~D*TV?Ex* z;Ea!j*t@_(5F|mjQg23S$r|W-oh{FBiSQqU=XR5=t)gM_KIb&&L(%-d!rghEk%9fu z;S6*{JBn*8{4Fh~0S=u)HQl0$O-6-R(Av6r`0Wm5XbrnQdR$!3ana@#!QTsw)Q-;R zNo!Td|Iv4Z{#X+y1+VKStYtbXP64G`g(sEeitu#i++%#0GJ9A-xipoR6+q+`n`kc0 z#tQDkPR@GEQ_geZLNx@rwe^W3X7-Oa;8fv)9Rc-ieH$otzy05BrhyVYi99p9&O$(F zIWKaI)0MeHwGab;MrKVoU^UD;W}w8mmP}I3V**2Lzi-CxNAGQLrxHn~c)uTJP@Pds zp5_n==R$b4)@n$@DY?+%x!02T>tHwZh^qj}j1(5pqHj?XJW zoReEe+jMTsCV87(za)uUG!Qfd#A#aLBiLrqjxLgZ#`e(Xc-tP$#W*Le!bl^7K+!P{ zJLLWrxz7U=WVJApcAf*?Ty<2WE&J5^^f6&F4l9PlBy&yTjiU_Y|4JZ<4z!rX-|PN* zg%=$J`H&l3aYyW59v}{(_gCL}YK(gahv7w)dQ1(&blf4-KU;@PRoC`O5A#y1TGgqZZdVKcGM(S%1YtI9j%?RRjPl5|9U#- z{7hR!Vbxs&g=@6(3%H&_vEXh(Im>~i`besT-Gz3Z{m-ZJKRvJ}f-#I@4GOR72$IHV z;6owqvTUxL`=(_#>w!`gY;wc7bwjNwrsApQArlSr@glpv&sdsvp8_rT;GZW&^a$U@ z`G023K_+-{w^)KR$ zr9l;g;XH4WZe2kd%!B%v5V3GZ@_PDrXQF^$98%HjavFunX`9;Hf|Z)Xz*N}$O~E~@ zfq=p_fH5X|e!ix%@Q;$FyK&8vd4XoL$66&JSG=1{w7iORh@!tu0ltGgI3d_AKhKCV zK^?%kG4|L!r#O-aS6yb=m$1c_EAPgVoQs4r`|WvDWo!iN7~+fQ!gPPeHMZk<=3E?O z!aUD5;#Al-lh?;rC&js`?bJQkmy{V=RVi^gW(gG;5HD1nO7$r;uFfH-6?t)YBSq+W+-dqiarM#W{O(os$OZ7m66AKgda0$d%gio zFu!VySb)N+eIZ>q4{_}$P%86K6dSeg?@W=Isvd5Ic3*kre-_dsZfC$o6Ao-gS(;-T z^8opwxiWH$WY|pnkYbm?>F8_|2B#B_pl}H8=bMr-Q}X$Zc}{%~e@t#9xn395)~B>yD2o2OEI~2ZGT2+G`IhzrIs<< zATKGFq%%XWe{1%|?k9uSEn`nF>T}WmW=@nre!OapjwF)zOp>GRB*#!2*GdAH*!Lat zOyA2`RrztQz8sFkC4e5+%vz&z(?;=nTJa==FMc8f~+6Oy*O3I z)}QWo^_QDXoY~ge^U!GhvIkTBDRyzvm)d^oY2W}WVUA@&v zWW?yI4W4iZyLpsGUltfb`m;8%(Q{NHRLLwZ17m7f)J z$e$=mZr-mN@)%&N@Nd%- zgeFt~{^FYj0R;~!H8L0U5me`a4$J3LuLX+K3!E^))fzOfS{Z#^w419!9d~VJ8t@1f zuSq(3GF}i?{GZDlGJHCYJF{MmAS`Py$Nt*1TIg9MY`+}z4l@4nF*Y_!){j@UZ>2tX zXOIf6yWVRZ)n30V?uCFtf33|cZ&4wT=4Ksp;x|pT>UP4JnMv{$$LDtJtk*QlvcG3hh77_PkWuvL?7P|HB1!QC=+V zMq`m}waPSa#y8dGhYZzGSHd%aIy6FZS>{IX{+y8>%zf5YSQzN+WES_*Zk>Xr#czK+ zU7z_i!FK;B2IM;QnK_GJB((E&Tet$DHb=~<4?iCRlZrM0>gXfJVgnCio_+IltJ zWzkFe^%mIL-mA(CzdJ50>^Kq{JX$h#A#Nt<$41DoX9i%5G95R#^-2CPRQU4YZ{~1t z&Dd^ufF#!Z+!UZDl|Sv9>yFmnZz)i13oFC9cGjchKvJ`moCY$%)6#)`)JHzu zCh7cs*$mSYc`wYF9+>I!$8Urim6Xr*LklFHuNz)0(9?&KHaTNnjGzpF;OvkXwa|@3~DGJV@ zBP30-rdHf8`SE=A{Y~&sa09zkN*rrdh(caj9f{v5&V}nmQC=*2i+s`vA16?}wUTw( ze;H9bs&(S-RXxEUyqw}6B`99 z3gqHEo{LQK!GNf)OkHZ)f{H{d&<~e(fS-y#6&FyHsdnZoYRx<4A&d}?1#kR@#TY+U zDK7l>ZT-W{!N-wxkG9EnrMI*=bl%d+Fd2ZA~iXQ=JF$BD;IrIHK?XOo{d&FM- zv`N&@<%LL@;d~jo&Rc#_j@w6m3(hcrGK6PV!S}7iSxf+;7wg6%3ay5i3i9(+JTldI z#lt77r3acN@2gKie4du-Aif!g5r*J{INW58VQ7LT0m)suBXlf1aJfp~&-K=1njuYR zi_LNwVGh=a=e||t#JO6;7BI~l^6&lf9S#v|Ha98QkLC;klrnA=A5Ib93q0q+8JwdALM$9Izb8&Q2HfKBFmo&q)fhj8I!X z+dYwPCv54=%=zKGn%>VmLgc!q>nx4sgWP5F(_#y>x`8kEP(umDbk=9j9!2JHTH%I( zvf!?3t&R3F9jkyC-%&miod}%%wYLr{4PA(3?J=!a&lPt;OpNxkY-T7ns^T~Bqs^OS zebei`GF8XVCvGq`G!iI9AxcN-fh8j!Y;JSqiCJO%z0M|z*gAEJVGz+ZA#Q!`_ygp2ewCMM!m~9 z+mLDu6d5JE^uQAfD<-$mZLE%V{@&&NIY)zmZLJ$yeL8Pp(e5c9o}`WUjT*X^7psSx zqtvji+!$KL#w9ZJi#N#M`@4dp>GyT-Mkmfz;j*NBs1oV-qOj~?3}6z{Iob8?Fw31RddjEXlZi-@w4)y7B#^3hJztJ$h(a$2j)sht} z`n)x(wELF8D?pb5hH2{P9_tsKoQbz-_%aInO%7NocWt@f@*v#&by1q`!^e|Rx63fR zSFj*UL!XU@4w1TK%4vLz-xEix=ZUx)#AX@Z-nNub4&uAoNn~jA!?yMX}wFwCn!UfY{Vp_HhiM=&#E%M>=0k5C-gWh^#LuPp`F%->zDm1 zyuUp|bZ}ubpZFG%?@I8Al3h$?`S}4Y#=p|y=$@Fn_W}0Ip@bOUqBB#~DXQrNH+eCL zFjPhByHGYvZnQio0Vhv~9dS}a0sq0Ck;gS#tYj^yp&wpvna=81o?T40NuX{dalz;} zTFu)Q<8^qf3M8)9lcaj>X8)IK<$ z>ONMqOoR34Sx*6yUj=lGY%e_8-U%Us!bcuohq zNqm*>3VubfKD(s&%Ca!FF}1sQO;e&TNhH`W!+Q*8&!lCq9-OPSlKk!1n-KkKq*&0v zMa{lVb8L9ZE-pwz`=!dX3yxinxTW1tKqLWAaZKs(UYsZGU{wbc8{*oTF3*WaM>*M5 zP$pU3=6_O)@6XjWQ#tz7=TW8EZTjl2semK9J0P~V9eA&~He>FDjO*VK@V>N6tfM0M zz9^RSFmNmFC63j%Pbf{f@`J$EozC_o*mzbg^G-HM=r_4PdOocoRw2kSlT!(n>(Ry# z;6qzpAVJ3{i~e2!MtW(ufsygko|gcqeA%_H`O69$On1(L9mCM)Iz35R4+Caml>x89 zRKfxkM*o4dsi;+Fp*taq<||#J@rUQSyn!5_ z=C1F04OxQug`)Qy@jJ}47WSIzl%Y)zA?PxHg^&0{et zO;gq+G=3tj(0oN1-vTAbdzo{eS}S)(r)nNP`BaeFdf{*JM&Li-pr14TorHJ;Sdv=n z-9O;Qnm}*!D6>;WzLuwV;-hWs3KPB3QrVV|>xZam%->9)WfR;6~ae9y9sQB3X)8y4u6QFU%k9p?`9)BR z(chSEHR9^3Lzc|XDxYaA3fy1C28J5r8(EbyjA6+mJ@%czDnBd8;-rqaZWsFQZ6&*& zGnU%6pz;oNZiE7P#dka82u}qe{;k+I$1mA9^IR@<%XY171}rTYvKmC&Us98ejH?I~ zO^qYGIg{p8Hfv64^-MMUYdeNP*lU09k!t8}>8X9>KWr!VSWIS{R`l)v-6NCYNN$RpeHeDJ6|r(lz3rD?l=C&JhVbQ*kSUoLi{bMw}#)0~S{?$UPF!#?6u%NY)cu_p3UyIho955oG@8pA+q+gwJntO7f zyqf$LM>%xWMk(wh}caithNbzj^D(~oVG0Mi|nF;j?Rsc z2Xn-j`dNNkurMURfmToyyKq?O7&UR)o(E_{h%NZ8EmKS@W`-zIw(2!@!VT=;ldm3Q)LYwvA{mrsBR{`O{Zj88!!sF?JvRwD(>{sO(E&uxEWDIjc)b& z7xYZh-w*J)Pj zo0j*83=LL3{^AnBFA!Om#iR(W;6hqoGggAvv0`_SV?pjBFOEsnaq8{Ajm)ugpl$$7 zF2az@&}2*rxcVS#my;k;8Vxzhcb9)iDZ&tR(Z~xM6duBeEomsxmu9;1vS4rwk^l|& z5I!6xUwNi8E8DEfCU!;dT*KFqGlVIA6VtbFct8D zlSi>H)9Uenm=;FwV<|hN`X}bRj%25$XI?CxI9+T(9e>P!*)99}L5u3Vh(ZKgS#*%= zdljlW<(JNlFaOF}uZ`;e$Ns%q8&fZM;ssCB1iZU`zvja?TGGR>&VnGc?VPf6sWa8) zQHh0VtG*^Hn2RZxs^-0*o|>4C;2~9tDeMfRdvRs@DC?oA{7(1L&Y?%K33emgdUOGG zgj@^d;cYS?kswc5%CPj-B#&QtLDsl3(H^lHw-vxJvd`$2rm z0~WQC9Z~bGY7)9CuNK1FlR<6A<;ZpgzZ&#Hw_EJGQD{!2RGcefXVQ35CM+o9daaYd z>!!OS+Kc0@zRbf&rohThUn+p)-dcodm}PAR#Xlpi!+nvdP-Vk95Y9U{-WvqB2%yQqNTGk;L43bSmLkDFXx?!5tAKd7e2oCRDq zN;vI|U=HhMm$cN8d}N~8N-laMzegBV0aL>yJ&FI$zTU@w$`aR{lPt+Q6=ca+1JUkV zzY~bHCQoe-!0-QX!{(!4jPUpkzkxvz+%(6;q#(=?dZ8$s`&)lbv;f~Qnj}*5!Di*# z?Q0%bfKp8GCJ?ab!dduA7Se&U%`WXrcIVs9BTuX#eKarbSE9y(FCai--8S>iGG`40 zhiSeLZV%IgJz*3^!Q$8jy9wGpd9hG`;yK_F|FO-&gOOaq~UspAl-Ph0UK{z!Q5|7d3L8lOju_-;#xp zv*`P4NNs7X!L0}8g$+5J8PX?w(N-)&iB9U9GKV3n=j}6bE=)dE9qjVa2&DD9uz+wt zI`}nQeq&?aQHS?2nR70m(d9^UdZO77)@NRRUX)GG#6vh zCVwe*RlOfHpv^k42wY?~Jm8$>1&2J7Wr;1t&Z6yqCnBtAYklSlmfF5pnm$1PjdTWC zFJphi{q6$}%D=LzM7#HC3b*Ste+9Gf@yl(x>l3&a{D%~%OVrvGl;Luc9L_kOxZD~e zaLL0QLq2atMes{7cW2MdeXsD~h$EmwnAw_`g*`-p{L>HINs??@;`4 z;@lp)P4F88y&v)qyIPQ^cQ=q07Pn#^+)qMtqUl)WFJ16|CH}AthsGAj5Py8Vne0rN zfz|7{&@0=pR&QmKPoW(Vf}6L!nZv0KTCHR}jl@-Dl8yyc+Fo9ka*v2w0j z7He|6LX~2C0>B4YKD=AljY63JK3nk$wKAs*vn>+9mV9l=!`g?eHgR+k?8tZr-|Dkc zyg4sqJ2}P>g=4!yQ|TfOlkPIZ)uyu$s;4MMjRyiQaop466gRq-3j zAdJ0|VQD?R9Ajf}M7sDH;1J&H%ZgNt0v#oCR+{BQ7Py~guePIOXjf(p&@hg!%j*+= z?dyT`aX<&bcQ+8))u3B5GxxW~L!e3DC^y(H4(r}Ux?);ND%am=CW>yqFzY6tcbr4c zVmwEf4!?t9aVg60Z+wd%soDlVBl;>}XKrC3mBw3%KUebRhCMz4c#2eiiI40$<&(&9 zyG0vc5oB9C+4+@^Ry0~!5JJM8W}@f;0q!vazZE^~HEt-O?0J`UL}5GQ4@mce9!7uy zzpU-!^~vJ$np`d^D(g1HXWp{?pvM#6;9U;KUDjo#!nF|pYkBTVO}3q~T&ilEYO6Fb zN~KyeX5y>AUqnJfE!cO2sw`&e3hmT>z77Bqqt*DHcZjZE()OU*fL#uC= zZ&OSGxE16jeZGg9#RNjUY|WUbjlN>1@% z?v|)UL=qf~REe2y2R<31CND(V)D3|vQCoqA*qW#F@R{dp6@@jL=8Z{6^WP)meK8+) z$ktc^q2r}@bI~Onui38l^Q@Am5)W+QMR&&H^nuwgq-eqVXwd<9oWxX6TNw?5l41aI z`H~6C_&Ve^RsHJ)%^w>TrIsjlkm~dXHRC<7)%NkpJYv`rlf2Xi+jyUQI%sjX@LP;V zLwCd=X2)o&z*!x#qosmyfBhb5(E3-B{TVaR=3i#5^2^1VnEOa|@!G5ZagIFE$g4i^ zfEc3tEx$Z?sh_0#EIN;Wax@!}b-e2&7JaPQWEwZ^jGKlzJzmQR#~BKss;G_}Lwii| zv0I5C$V8^Zjtyaj3{>9%YgpcH{$HGy@;_f+H3(Rd^?l67*DsiD^b_|9@Apw-=F|ji z7h-FVd0)#@_OcqZ=T#l)p_W6;0T)R!B|Q{hAdcD zqV^4#*V~3^?wWuWMm=$w{I`nilPul8bT&k@x*HEFIhe}H-Pev1R}a5&{oxMp!S$H>htLwuZ-zr zNUnm@iK_tF#DACQ0?u=7pcNb3Id3libYZ+&mKguGue{UrQtU3N*&l6(nb8M-r&<3K zkSzgKJn;<6^89KyK5tU5 zaX)`&%wl*U+b>(PBj%9PHha^5RR=5=oIP&H4%+=;8Hc`o>G+#y_Jp1aiGOoFXG6z~ zO<$QSa`5aCJ~pD+bz{O0PML%UNv?3NT+<9R?57TV*M(=}nwI_$IiP8`DVI=i*q0SF z(X%jOtE@F_avJ(WsBZt^vq{uXR^aTqhGR-gFHwr!mel^T6TUp{;1wO@ufeL{PC&`B~+gt77@D zz*u-~4V*$gg;aA&A9A6o7yZd?SPx7wjKO5LC8sR&r*Ejp{}sR_)G}w1_m23)hVEDs9YbBmd_ zqEP`xDK3lhS4k0XLnw|%@a>vAFm!;5mn_>>ooV7#@^?!qEp_BCqA;%tm}_K5iKXW@!HMZo}xs<{&=(7c=n=(6&lDdiEqk z>AZW}$m3*+k4apb?~i_Q3cE*0p0%(Bh21kfNw8<@7B@_49OK^7Sh0V!`YI( znlwwp`do?o@1~bjFVC4EU!!wmm3|YaiiyjVT{CE7%##Qo-T$5>kqy5o$9$qA0&v1a zsd2cLIC}ah>E$m#4HbPu-RQXF=X&_$(?%MpTjMrBttAV$Pkiykd}F=J|50?^;cR|w zxUH(HqE>B+s#VmC5wulQ6>W)GNm09YZIYt)2u0QY*)>ypMPl#0cM@XMj8TH%^Zl9Y zT)E!!zSpzv=bY!hR}=`<6T^!kDpkC#stjDA0X1tMx=JePfZv(8|HG_fTF$ayTe`sR zk0e92_HY%-i~EqJ;^h6kWFQr{ac3Iw{G~g~bj)}KJmmOWS`jHXkd)~MIM-C`euWCr z7y;aArqtaGU$02h{knoKQt0!|NN`T}T&v-w)vxCP8pVqfR6^fi9R@B~F57#wi7D>r zM_|t(+R{R%keNC@PyBI@_CQwpB;g`t(|j6Hj`L~bIf~hg+F&Zt2y8DQX4EfZy@?DW z27j`?@2VnF8d(}dprZb!tu-As9dHInx*yk4qYT7cJQf}j<}vkrnPWiPO{Pc&b6bn-$ z9f;sfbbtJM7>hs>6{XD>5U_WdhI#XC&-hlwv^gTaQywxC?8Np?U zdjhq*tSiH_An5|22UhroSahHJoqodJE9fUkdANnwjAS}VwA0MEn@T<2OTgRiMl$eb2%?VAy01c;>9~B8>H-*uDfJcEGJK3fL@iXk zj@8@R`?>WpID$Fm^FR{x?b$v5jmE2QsxjUzavM@3Ak4)TyFUb%`a z=5wdPWe;)e@kuEpb-^tSY_ zK6p18#)gDFE(AmV>h^%)m*pMeDg``eeH~7O*tKvUjFEV9oH#%J);b-EO6e(y5t$C! zTJO84D-B%#T?;t>(s|9c;(X?}VYpw8%{_QYYQHc})iB`J@ScM#X&}CpF!g;;K*t&Hu9u&YqK6%(?!*;>{lwh} zsbz$|vXnpRXWBm{c4J0V4ELT7M@p+-8u?M4yYB9E@R$AlhPEHG>k?le8Jq@Ob5O>0 z2hehV^4RLnU(~h?PZLIlUdsawKcJOfl{{!zMMW$=Dxtglwr%19VZsC}7W>d#86Mjn z+z2z>U(nOx<&HjQK*JtW!OSn=GDx&DwkUeNtvgQO&+S|kHMrh|io_ESz=1N(hF>q4 z(i}&V4)K9tGiouJvAa#jzgr_b{)1}|6@I)iZR){ys8G)Z^951X7rSs1ikKK${L~v; zsOZdKMI2S#35dJ{)@_FscAk@N!$md4n}qlKUk)fwn&K~VmY)#vaB8Jl*rYdFn%u3E6ZV`?32ElI6COCEU1DG*Q8GP2uz9GpWM=+*SB! zHK8`!$?=~_mAFUV1${n>_>;cJ%W3zD!~2Z^G_d_2)*TuePT#mc+o2D~c7HoysnX7b zU7xycO;%S;4Eq7!phiEf2Ol-bL)#*idH|L-=7^HB*gd3VO8wvBP6FtAhDQDc^83a0 zY}{2(&;7MoqJz&+QFD(Ui(O#A=Y2y0!?0j4R$hhtgnK*6tmxB%fLnl$@Nthfi@eyh z2GV6i)g_{2PnBjkgLVmULcbi}Zup3~TP4UDvHXDm$_~Xc<~p~wv+dr5{O!4k9a@Hc z3p2jpf(9G^>n*9b4o^OjiLTE|&1(J2d~y>bxudgOCbFkJW9i(PDRcH1S#p}y-e08< zPtn{Mz>Bbe5DEFGwA@ijJFVbIY(SWd@*+|C7t$B3D#02zIz+z&S61 zMl>^C!i!)D2-gEi{DF`-1GiC%#G5m^r*q=}o*=&(KoKUC(5hkq&y9CCI6Q}fXBJht zbN0w}C5QP6w3EL9upP@UDU>!yd)z893{g|G>Qx)PW6m-H+2%3D@EX~L}RC&O(eQB8m z9=M2gO5SS!OUS$p3x)`un;kqfNall-U68}fdyVlqnpydpKE%V5)p{R3|JrzFEOYqB z$cv4^Ov31HIghgEwv<`kRn^ih@tQTGz$5VkX&JYV?f}&{6t^q07fTNvcF9%f&N;4Ly|{Ndjm!dEOC&097|S>9 zYvFIN_j;_*;>07RwR#+?5>}x&vYzoIEw?=GRrXPIyBY4VOr9pogv7cW{xT{qxqTi7 z`nAW;t&Z~`e5P(7gYJ`51v>0pW^9W_!{*qJdsYD(q-O+gm{O>jb=bs(Fkx=)0yp2zr!n@Ybjat9pKm3L{uP)$XAL-8=uyYBXC1M+Ij;JX zfbky=;wMX2l)}&*G#sU&=*Q>gr2j%7@y^)?Zp$xAz7W{dksDjNfZ!FHn420+XY$Jh z_Mvea?(f^TevZJ72;12`hfKNjmvx@i%%ood(0`AK6J6Q@sB&M$Q|TO-J9v_j1BUm) zSIG>vxQC)UFf6{sio;DYr?rg?B)bsNeH+p9zc)_?_4`}be1ntx1oiLz6?~Io&z&+4!XVILzq1D`-dvgOo1BiGPYZ3acGRMCTs@kVoEu?q;c&IFQF>*4a z78wM4-IQG&I;P`tCM=(2bidsEAHW!i@#|HTb^0i@x2jB<&pySzGkmh#dm#P|Fvv=d zdP1c4DQM3pEZ%AQu`X4e28{9v7=Zx9$!IGZwJk2FTE-PtW4C;kl(68J^Rw$y+no?^ zEIYnxVc2`vp-C3o*j|drc0v~R-M+H-mpk7ISC7`{bjZLrGEMzM!p%BbL!E*Q3o6Nv zkAzkT6a%c(SI79>rUvjvbM(2_jp15Pdheu-Eg~d%>vZS|EIPwqU7WZW3}Yx{i7RvG$aIMrEk3)N{(c5rhZo=NlQKtJYK{gD>uUd6M6{-IK zCm!2^`!MWitDuZ^7@hBm=T%K4&~<8%2Pie1Av@!!ABijG83%rM`}c|tab zn#?N|O|WcGrsvaj)^^|Kkax#1N5Y1Y*K;QItJ|=y9v>p}c9Nwh)~nz3Klrd8Q~E@; z2=?^1Q2_TeY=9I4yOl-Cvp5(9eYpvfLn$9OjJO_zYl~5+Bv1co;^?k9H9Au~k)JB5 zCd2L?bUi?Wk~9`RVcrmp4~CCcQCZ94u~+Iy5Mxd9g+j&@x9 zwjY$BAL$g42JfO_6o`v$vklj5?w0A?ot3){25@_RG|*C@7w}eI-L}d-ovtzFfQ<@%QbZg zPUJ2k)JstR4vlMRu=iSA6!KOApv98OI5e}){@$q@uXErz73MbFeD%}vx&2AY`~xQn z)bF$GpNW`2Ajp&qEF}H#98@3eFIS=$-d(yAp5_0(O{{KBd@AA{8L0qg6F#4!91eW% zO6dhSecs|so}k$CuNB?{VEuBR*2A9{dMa-@N*kPnr3y>$N!aRIc>8hoN8Z-znFqQp ziizf7&mMU4$Lq;M~%@7}#5t?@MhjG%^n77Is^C zD*v$g#<1hkr)6dXuPz;kjHFpP{2Y;RZnJlD&-8DHvZ4I#h3(W=(0+OJ8NM+B6SRI; zMs=tJ)pN-z#)FbL_rvcJAz14Jsb0&fP5Gl=F5A(GVJGp1kI*+U8hG<%{p;J3 zZlO&jt$}jAW;y&L)h;0OFrAbMNd0H?$&fU#5{bLM5KnO_QFwc)o*+*6{?=Aj_l)6` zLeZ#mk4R^)0p>sHhu&_|*^qP@dcnOPFE7G^psXRJO^~#ZPTO8C@3pLXvUK;T!TiK& z%`U%~uw!L&u24fhFN5+&y8=yKe6iXrCs;*Fm0{J5d?tO!zb4 z0M7)tac#{`bajp`LVrXn6hack;)X#gE)PcUC)ArocDFWcGlrZcH)z z)7>k~^eZ8-JYMpTuKqInhvNziUS=}){O7v*;{=|3c|`ld^PVeHLx1nI{yD2F7jFAD z^wfNL)ev&WsqonD2-8dZAep|r6$1H6>B<}bkXYA^>bQIeMYDI~A2*=J;0t@s8Vjl} zqCm5KKLX5fE-p%#>suXQfd{9O+2PB@y@x$5AqX&)*1YiVZSM6Otm`zH_oup5f<(;C(*cBgOkiZG-{sT`H{p1P%H z)}(Hkv@t^1;d$OEMg{ zr2P`NZ2F-hD9L=`{q@0uWhVTe^|7?KdAk&Di$+P_xSaNaB6B;kdFDm1giBg0-8GaD zhqk(sK}YR+OS72jR`$|mLx7^7x`b(dNgA|CCyR^BgSbQy`F$*I!@n>F1Wu2Q=E$nB%Q;3bm4-68tc77pg7UgTd-0(^qO7h<4n3JgED z_-Og+z8gxQ;%n^2pp!ZC__NYF0r_Shzio5XTbJY{B%-=7sj?zZx0r)=A=L3;6s zU^G^|CnJQ*WE}Z&7f|{Pr;@G(^pF#%fDAuIJpnRYc8^Fd7bkZUVCYmzcyHoEkQ)Yx z7%2>bQRjn?xR$2j9q!+g+ceCK6MjJ&{1N+C%H4Y%5My%o@T zo`u^GSVEkcFQcg3|0INpQ(!oS%$IZ-73@|nCM@dyHr_*pj06*=)^xkj^s_He{m7UZ z2J32#Sn$>Jk7sFq^Z}v>R?lCP!Pk}S6<4A!6ZcqFeQu&FIuc2l(1fMUaOL-|GU6k6 zGO1V!+;t|iX;=ts_VAVWUh6|CuF84HUH%}P6V6J(y~-6EjIMX2jvr(x;%;{FZ+t5R zkRI!LcjiC%zZsIaQk&^U5^i0)7 z>HL*v8%kL$@z2)rV=lkr!Jdm*zdvC*jS?xI$;*h|ru&)61XX`$d9v`s-}d#Uoz+U z{Z1T@?AwZ`sG9!<=r6j%{NWsXqX@kwNgQnFY{~yl1LoZUk%E>?8#q*qSD{2IcYkp$ zm3H`UqUB{-2Vcm1?p{|~kmB6PEv|?$MuMyeS0{?inJm`vjB@#gER6t6-!g{=QKK|*V98z`26GL zMv3J3(bROqhCK=IZ3ph}SuTUr8`SaK1a>vI8MK4-V-k%YRvs>Gfznv3#FT6DXMR}# z;G0T(Dg7-UN0G=u%(y_xo9p6wBUv4w7q899W9XvRLKyF490z?K$eG z5^LQq$;BU1l6br4V({#}Dfg9|Y%>YO>ia`oD(sp4%Iv<1X)6}X_uK!qdVD`_A(K%0 z%UphS7Z&iF^6c+~#9_l#Lx(ar!$=p+BL-Yz2WNQ)?d96jKvVuGilK*zcfs?Y`hDHd z3(Q?wXjSnq*%yX?4%O6oC@2t5E@ep3$y;FzUeA0SnB|8Rqq(b+Lp-F{+3|Vlr!%eY z!c?nJrb%{hXG9=Xipgyn6(9qu^7Imsf4Nz>uW>spP=wknG(!jwMa=&s>{~P4=_TE) zc#Ix#BUZIZVa=#v!=%V{$*VSzQ9xTej_0m!H$sxce|%EmSD=Y5zP!n;cuLg$0Cv=H z4u=FU$N?*KwEK$0r?u^;Kyz@kHtLs?VYAr!q}q&L_uebz;%fqXb0V(~+uCf4ZrG9) zIBaY`nB1$aPT9>|f76QFPaR5oabvex@;*fWnTWZelf33!Q98xdyWgz9_mjggkqern zn)oe0uEsal=9@^oAcBa)EAW+Jwne&zl7pptrvVm>9C~?Nz_t>24!Z2@enaCK1wm5} zaT9H9w)l2PV?D@R1#w_P@7vA$PR<8ZJp$Pm5gzUwmtEd~YwUmp|oJBPFnLQMB<8A#XY`{JqrZNhQQ$(ahsU`q%dg z^&Qx(86gU;$I4|nx29(>OXuPZ2SZ>fq3r2O5?0T?nR*sLB*Tm_Vo{d zcmkv~H`Mt{i+4J|v-clrXig85m>3iwiHBwzhHu?Rza+Wm@pyL8jT>`^shlStIZxd^ zA{4_|llwln=wz$TG?c)|DHwb>&%OL_Z&?pE^_W(e5c%8>;Au!b=?S8%;mbiiG36=d`Xde60Mmac30W522XQ%>A^h{+_1 zq46J=7by45nDGjM|KPLh)lH7ZWrg;ORsbF}Oy&Vo#SP z%&d-d+qE<)%qh;&Ns%OrJ9@RS^2r6ezXWYiMl%5y;)`z&nbW$GHJC3yh*D@N)F(Xg z-dy;JhqF(!E56yFB6#5r@tniwzqSMdQhC>LLbPL9mIN&g@N@A#Zi*`dTN=-_zFosZ zB#(C;Z!h*pz4V5m-X!BvUnFqZF31mC0hXim5w)3d65^(N1m-`GI{5r2j#!>L8Lc-7 zxiSYHe~dfWm%)zm);&PWJf&@TQXfQe%w~_%Q}+siNcNy;59KfFKN9)<%*g?9ola6W}M{I)ka;HlbZaABHvtn?SDlJrSzc{&k zqRaOB7UoXU+fQGRzw1KeV;pe0zw4A8k`mk@nv_dcnzb8Vsux>l$VQt z__LEU@0`4se@4f>K&~94BZGA_*xTh_&29x6dMkT&1**`&Y7f@W+{{3o{MV(qVoQMT zj78EG1!YFmDAaNaL+ABJ2h@CfD4-@y?yA^?beS~owZAY+R0+~qX6TBx#H1!-Z3dgE zF8+8bljswiREPRbWz8Xj;RN6gAsJMA6P}*+Jrm``2aMZt5)gajf7kCbl76A3o{I1$ z`jP?oDv$k`K=30D7SMR~?WJ5R8zdcy1C3fm{-9U4h2qZ*>v&-#Jc8lakDZ9Ld$G;5 zA3y+?&&WcCFf2dfN`BCvkED5?25fF&zpcQ6lb?Q^db0~L)AEOIn7WiXq?+r}?5#Jx zBR_3=sZ{EgC6jD~2YP$wriaxc`A(=*!WV&+)QWE><%k3J;x<5GE?b@Ro!)WAEDB}O zF`DyF-b~QaBBfv4pi}(fJta-xJ2_Tm^mf9G&@yMwdc~D58rYp!+I!%KT6I2cuKY>F zZqEk$s8IL1*PaOzxu+44F>hipu&5x9T+yx`w5mi#)K;w6^cBlT!M#~ zvtWJHf@xgtlcnb*BY#DtN4)j+JPttUfBP#qK#?R5EhIpCY@+7rU<@Qm<%0!|qu`N; z-lu$?E?c-WLgA_z1(&q91g?;kI#!~dqe*+m*>d%)vXj``X4W&FKL6lU?%+R-O+&L_ zl791z24Pg#zmoqLxqj%xM4SxR-QLZyU|li2U~wDXoUcghNVcR1zu)DQ&cG)Q@do&M z{X2rMp188_*WD>RTzct{6RpdBl=j{f`=Yl<7r2-B`8ewR;XZ@D4le|E#2wxbzxtLX zvT&GK&8^^`RjoHFLx(whQIm-HqIy|OD)gacGFa9rSo$Dz!Y<2z_QjLq(FCg*fQT$l z*4y-m3!Zf)(uc{_j_jw-r*Rw_!(m=^_V?YOgvY*Z=`XNiZm37_FD_3{qh1E@x9eOn zq`SM&?YS?DPQONXd39Z8!DvZN`$gCZfu>f6)sxo*735v#(Y#|UDhTrIDk#fqRsN3S z&;7efFS%mVcd)Oob5VkOs<}oPlG6XfXZ>%7ide;z!~q2sHcE(^+U<)+OVM&0BVN`HN;6|eyKEkhW2bHVjef4Q}A`&vV4IhUZJ`{j*)%O;fI z)}_KfY^ELd9XWeT$<>7HK*l409q3D7SmtnxWjOgT&v~_$=qj>mbn5f&jM=WkzieYh zXAX`GNoV$S+jUk0H+^ym;po1Yf48}w{z%t^ROFX`ZPN!&K$=G3qeJkSNjJnD#LsD> zyB*@d_bA#$oCJzznn@A3TDMv{R69=g2H>-bReQ26zjQP+`$sE@POx!>(4m96Q(PQw zIr~x6hmGLAUqa9Kj!vA32ZqhzoDxuhgRgnCK<#lh{;5{A9`kYQ{MSCDMxU{)qN`5j zpMH3#Jc8hubPgXtYL#=JOJLNNL?iwksN)k`WA@>OJ8C)+DxHVX0k=@3J~mR%zWw&| z{Y#>&22@6Sj%qO}oQm@m#~bJ^42MZbfc$kI#v6K0J%)(CXeli7h)p_nGptEm8Bii9 zS=XsTINz%+csq8lxn0_?(w$9z_g)==%v`iB8if_mMD_(tb3B6U5yiKwTQxhVK$)w4 zZY)=|=Yy|_b6>AMA!)^b zA@->1kaA>HuAc1ROFL*}CSP14h;H7^_Lwx0?w-3urYCPL&3Y7jlo?PInQeqo(>x$a z5p&%VInF{lpOzZ{x~@K6-1W>QG|=Ktdb|cO1PoTN-LJOa!+9ahtlXYbu79MnjwOqg z7{R-OP3m(EI_%pG%mnuA;w?0~i&wknMS1y}=y4 z0+&M7--&bUTLF)D^ks?Ku^=+feveiz!Y#Z?H;L-_1@vA9#WmlS@DXwjwXHSk7njiP zMdI%c%JGwfQJw;h#fk z*w8rQe2SeOIYF0yuj5Y^l(E*!c44UUuw z*+>5|fQmcc;;M9O*51@M(nTF35u@&H3G|+Q`b*B8+rz7J4V*}TPbVRCJvqKx#y=YA zh2t8Sb~zJl5R+xOoPKwUTX|1n>T5 z>^;K@^VG3*%c?F+C5q;lnI-Z}c)9~&J!A{r>Q1sr;lOabUa+hO>?hzH_Q^tYMw_~m92 zxB=w?K<=?eF0Cpd9xaJTGI8kDY2wN5U;GU3h76xb>_E4t=)~nRKDoQhO5I_AKo0L7 z*gQ@k{q*4^jIWBeaa9#mhH>z%L@1WZ-FJHAR{3_QZqM?nlt`f*?c$fINaWjh#93m+ zd4sVw9P?p&He=)43`{@2bX`4mJ!!>Z?Nt#iS`9w8woE&J>jC!)^N|>ChO>oStkOzK zN!~@@4py+lq17yU&nCfgg9g75$(fM9NH+}Txri(aWG-CZ@Nff9Mj+w1rm5cgx4(IG zAsdv-#(eO>gfsatT;TBh9UAu)>w_nuX_@(1(+_k3=Q1uaJ0=e_I3p~*e#;H+fy`#F z9Guj-D38@q_q-^&AxU>&YIvP(M{?!uEg7j6jM~GMy5iHo`p%y3YSoZ_KXAZ(EN11bJiUR2V$N?|PQFz=&G{XjJgJE%lJ^ z!XBYVoBbZ-`J^`X;R*!-P67XPSx?520@2nuI-l+#Yy5 z$-AJ9JswsPmQeV}Xy~$X^W}20Supog=*`RTLCLK}It%w9n*U@EK7qE|FPTw4>;DiN z+sD98hMO_FH?HjPJ|qZ}?vJqC%FdDx;EXUXC8yr|MRms}+g5bq{{AxMMM(G}7APDR zbIn^?e(`)$bnYfi<6PvG#h}D8-X8T$#pS?Kx9{JnBR02uPMw4JOJ(0sL-{45j1({T zE5*6h;YU>7(;cud+-;uO(L+-RCs(SVWG@88< zH^oHM|86^Z^5N>ma)1b=lNNO(MWlU#X;Db=qXY)*OAuM@$x6B7gR53gK#)oZKGu$Z zM?+jq?EYf^W#cA=%Y*#&QA9W7&1FFZ94W!+*L4cbb?-v|7abmqBwlY?cAyaV+lwZDx~qz){KqrcO%C z##8fxICLAm9@XTXxi>5RMog0s_z|fZI|*Ru2KzCQsv2y zB$alwRYsq@CNv0X*KvAZJP+(ae!6vb@|>HxlfgSn@J#g?b7m6OtcN-N1@=w*Db~Lo zo+IAjht()*yaT-=^?l4tslSM)^fy2{cfa7f4v$2Gz3cruD`)D zA3EKvQ*os5LGvymKcM!HY~>M9CfI476<+49%!{iGraRO6|9JNQ$m{YI-sbLr()r$i zy~O#rG)ASh^2w}PE^l5>t1*{9qtP|$Ji+%1b83?ui7bcm-fXDm5IXNEK0_X_HRiTq z7AJ+^^*8E>2$(K%Y`bfQnQzd2wK39}<*H1hRUkwZwx3zY(9QnzNx5vgDoMX1=;G1Q zqm{}pH0Q35?`7E&b%@;s@>6|2*V9LEAtmTq<1T@`Yp+E|#pjui*fmrX@1C0`#tO)) z!%Lxb9IjnxzC57KCEZ`$?Wx-;AD$c&kts73Q_?)UqF+x9{=#he{1v6jgKZ@I)ii|m z+)dk@8p8bdl4kw)aIcoSQ_Zkq0=u2N%zIQ$GRV8!`8t&bBwfP5B9xA7rQuE3vltf~!1z zdaCa*H1r#LCX$lFcIeX)k`Q})2_*`hB?@4>M2?r9ti=0qUAIH zI?g2)KbVWmnXOZMq0f>dKB*T>#qFabj?Y_3nHd+^{pdzJOqSKVX-y~(Rc|ZgFxJQn zCukhv&sdppnuzFNhVoL~kE2KTh%_aJ?eL!}Oqm&@@`V1unb+Xz%}cCzJFJ6cSpT4s zDV>+g+(3Z9wm;R=#85^wD_#l#NPX7y*JWr43^Qztl0JPPmsR}v_<}Sz17RkHGk|M0 z)2kV1s<^+aw`isRySZZ^tspS0=qKU&2}eHM6YwUC-vyW4>ddhOORi36zKe7`7Dh!y zqQ&|G3b!9apNw&G^i)gl*4f_oxjNX&UjL4=fUZH+NH)*7&rMdNMOIIG_dfTz{^Q0z z=7Z(zt%(a{yBhk)a3hbMDhN}%X`Q6!J-c4^`~1_rU}%TGQy=U=;?v2O-j3N__rHe4-@G^(1M57~*E;D^_a zOrF|Dlnv2G4dRZ9J<>gX)-P~*EqWlqQO&#OEM>-KCJs6d8T>TxY0$}`Q976IVj1g6 z#pI$x)1V_LYr?>bHk!W4pnJ!&q-@e`*~4!qwlQmB({1`R&mgLEr~KUKi&)8c?$3z; z?%a;-qdCDMzxxZGb&@&ioTeGht@f@)iKo_671L(jYo3#4-MgMfo6rwYi^b8u`5B?>Q1LvYSiK|D9If{8H*Y;H4mLw4v?jR8sPmbPBpI( zxzRPps@ph~7>jI7Ym_>_ZY z$A^OkUKweeA#1wGw^`ei?lJAEkRi6?P;I+6@e6eQOU{hOY9U(f?M0B1eMIFhi(j;< zpS$e8lDZ=Ii30z5&nDO0#_l=me-maFbv{6kVei7xzm0y5{^;_?bL*4(M3g)yxUM$M zuB38)%caP2=Yzq$LM?~E-OB4Xcl`R1bE|P0EHt_`u}GFI^vKqu+HBlP&y#h9(|glt zkOk0GIo%XJUH0>Oxv-*3)LYz3O&`-CDjynlx3~7zp@YKzK)}5YWy&hBtG->{bR*sLt`PN6I{cz zWZPq}^`Yg&X6AI4@;b;+B1~;oBAR|xxl8)DUl)5$WhI4Tw3Yt(!)R@-c-aLVtL{Gv zGP9Wa1j!ZZisD@^7V^GwyCdzbz`+8fJbb*obh=9+d%z)T+Pv&sL}G#-F$Y*xUQ?Xx z8qANbnV$K)$pt`kTaPZK*D5w`iv=7D`$deem76{R9P7a)OSW}Nc%xXeiVCEPwrbkf zJRLB#s*_e6PHH00j#;(U-2@#FJgOpc)$f zpO!S{bu%KX&HrzImaF5aPkL#iT`gud5!s(utV}r7LOoa#G)8G0=e+D+GtQxQJOrDn zM#&t(e%p~#Ye)4=oFh5q6b6N6v!gNeRus}l+-jI(HVecUS)E!uf%-z@w3hnxdAxoM zYmO~Bjq1gyfmYf9IeW?PF~Ev)B!@-f@8bt>Lt~8?^OAE-C9$TQzdi$lU{*lgSn9Yr z$k%b>t;eviYW{KYhia|a(ZNZz6vc<2;RE|0!SfGPt1CQ(#v12$f1l6)REQkH9|Zcl=k^z4}={M<2#^jF{i@ug3`V& zx3ZYurlWR)Hz&7wz>|&3Q>ESNnv0g$0~4A4+WwMiYp;e3drq~3S_RQm#!s*7QmeC# zi(&$FPOp8`1J>%9ti}qk9RF}EpVpr{z*#q4gw(TaEUMM1t8{2=rqNwIY_WXnpjPng zb6BI_r|@PEt&FupTkvfJs3zY>zi2#nz&rOpO0PEIY9V)xvjism`1$W{nplUqZ>POPHUcXOpCJ*ZZV$6 zcK%K zqv{coQ#w({gRH3~mpU?v4HC4BX&DU;Li}Q#aGcCz3$TxCUYhb^$g6!kv|$oIK_Z1y3a2 zNDIx4+?$wUv>S1_7vHzJr(Z2wxc^^drd7CLWaq4f;kH<)em+t@xii;J-l@bQU-q_7 zer$Kq>CGLTlFD&5OwQ#)X~iG&<)2&!el%i&I=wm68f7+ZrIyM|@dfcFqbVGoR$2xoL{m$*pQY(p zhg-v>ify?B>r`vF97KLB=H{>hQ#~-!&+MFs@@MMrbDH^lNH(qd%qm$Lx#A}D^J$$% z)f(%Eq|nay9{0n>i~bwFudO?)6@4yfiIq(>aZGl-{;(!`Q-DIARw3W!{-V-8nlLR> zhDEl^mQ_!C1Ft&0D!4DQ?~o-eI!B!4n#}N@{TbXi2T5M2Wvsd#+LyUSP8!GpB+6_ENwToUM2lZIMrll0?gP)(1V%y8T|?gNS* zVO$U$XzKGpmClef6uMbD`$*=fs;Ex5B>%mJ@#$09LGY?ealNgj<-2P8FzZQ=dC_}L zi!2UZWzl3Ye+hxeYgH$U_Wugn(SH?DHY8D5aK>N-#)G0dzg(UHQ z;%i1oZ2N6Tk*dDy2aW>N3_Mx1k}PN|W~Hy-pelLq`{baSMu)~Ky>S?afn$xdH6j#Q zX>V)3qRb{XM|_xgZX_Bx-Iv}RsoQ1^T1uC2>Vs}RQ`cd4Xeto@ovA4G=+!?~JINm) z)no4kS*x;xdd4p$71h6q4t07J-1T2f-bOBN89Zjpm7#9_>nSYqV>)-^n?>OWo|Bq& zT_RGN6lgl1vMu({NBaEtVoBLgbntHB^-`H#IU<8_613M&O)=gp^mIu628Z{Ajbu8nw6b~3>k z^w}y56s6`eqklV7}&rG88QV>VvHs5Z%mA6BVkI{?&LpX!9G`gTWi#0GrI0tyY(5P*W3 z%7a;@)GmueK}^y695pkR$XshB&uiZvwNr})sH!o-3wd&WOTCK6q&x9HoF_r`9qO$a zB#-GQr;3aPIJ?4&%j^_GRjUkH$4yf9J=2C@|IL*EX;wvlna`z$nW>5?ENn7w8@LK` zX#=FTek(Sv^?EgcxY1@6whHNDYBa9CN_}T*vOle~cR?qgRL$O5hmYsXnK!3Kf1bvJ z-S?-DJgO( ztdW@y3)kOSOq3QpFP(cNCbzYgSmE}x@bygI^Rmioec_A=J#9b9g~Mm z7VJ=kV#{7<&i8{W)4A5HzRLxW`4%EjSTggj1F~A#9KGHE`MyAqNL3%NaU+rrJ^cp8e7IgSy zF@mG`2f2}(xRGa)T^oW9NqUE`8b8jRSs&VN*sOS|ElIpd6raJ>*h|oIQIFe(`?H;Y z(`f%J4@?~#y3JsFPy?Ezktq$k_RqJ~CBDt`Ih|yUJ8bsDj-$UCp9)>&n)J+T zB}Z!_WBR?{b>8_~ATKiMY`If2Q^pg&lFMt|{J}8rjVreU@HBpe> zLhS?DHeaj#o8O6~9@GEMMA~$|Z5AuS=IC+Ry(Hlq}^Tw*?y0 zq^(Iw23AILec3){Gw3T%loMsMenIT3_9~v!Z@!M?QxB=%Z+P0JW@@NDXDPFTKG6#; z_0z5{!qoPIPn@i4y!GGy>CW~4=ULYV`*fzo7|{CHqvS`N#-VeQjAGWlozQtu=jMm& z)UG;>tPqR2*;B5LNXev7>wd;BqSjU?;+9llU{`Be7|AS!d)^ly4W`X~6*YZiyAZ2B zt-olT)=U$n71b|M5Z)**NRP5RF+{qE#Q@rDwN+dFq%S6juceP~BCKaUjcNpB^uUd~$wG}z%-81azdFTtJ!Qs*2D+n#)YT?>%rhz#wO^ew{)Eu_|N9Tksu z>BRVT?S7~+L#R~=_?F%C9LrZnrt~#EB+LKXsvZ$Kd))XB?BJ=DT#%K@MZ>9878`hw z=akU)>hukgtv%W_isz^}zDURNhw%Mar=*ja9Rn2kiYAk3^`8h~=lKGpUIL3wgofch zW2kabnKO9M&n{6=v9Z6PqPWSCO}S%tjCc#^6z0iY*JW{yy(%bKBkQdf0fD-29E78y zrvHz@LbEF}y=8ibwmb#5YUIm2hPc!m+`Sq7!nIfGCA{uaORekg;K9^-&OdFB$znYE zEq`2I!(&y?cYC?B;W1o-^m(R-S!^EC)UyuL+ZGDYiC+fcc+Gc$+CpE)U^cdrvzlr*Odz}Svakv%JJrY*sMdwqS0aGjI?e=+}Y$sp76EdL#0Jxr}O7T z7b8h))%N@DG+_^YENtCPtQjn4M+uBvRvr=|9-`Afw34P|mViy0?jAnUMO~j6r*~IC zxqa1wnPL*k<874XY?oQhCHS!gMY zK?9+vCzHSU?WHqApUH;l#0y=wS)8tcp|!#@wGvO;(jd>Mt|P3gy`XoK+P^#_X#>;T zSECXixuSAxSUvQ=WU97%nAB|Ou4vXQ{E!uWIArdz;hSqMP-pwL-a2A!$KBr3Dhf2a zwO9;#Gpx33AT|8@_oqcnt=YU*x{eIZY|gCy4wtA_uCYc2sPA0j@jb_oe?~P#`a-=F ziYF|f**Z}J>Xk2}Ig*3)8LT=1^Vg~I=y_CtYmvLgc-&FrMYW%!Snig4QhwtaucQhSx;nH{~K8fdaq%5>1E4NO4Fn< zyJc1|*s#0qI?KqOJia^m_;pFw`whbNHlDK4hVDB$G(~Hr@z5kc?6m{N{T!cK)nBG5 z1D6~m)ldA7qwfr4d;k9b-rM2!*0`;r_H7j{LXA)}w^vo|+MB8xp`@brNGGitwPM65 zL2O0P2y#^gvG}y%B z={#q6aR#T|8!G=#Y+b5aM1cd~ao^IIP~kw0e%0W{OjDESzxf-B)6c5$^qNL-R}I)_ z(F2sz?Hzu=WKoy8vt4j77N5OyP_6V{Zq6{Tli%k^N69o*VGZCU;?Nv2HJDqJy=no6n`{{tHkc?h-2{%CU zguxa!JQ^0#mAJIyhNtaOG!`HIc%yrIY*Y+79z3o_$vf2^-fgHuhJUzzE|-jGf*`^) zKy~;l;5PqDNPg7O^rF5L!26X^3`$PAYva<`!w%o~+ib^q^W~%~K|^PJTCVK-`9#b- zmfBJHeH3iJ^LV2j*|g&yDKhz>4t!2I2=crg9TFN~u8D6)`S;7M+l}8W3^!`kxHKs? z;pWBBz&)ZUhE(fy3;Qgq&fWtKrnwq|5G#f)_h|osGphGKEkr+hY&>0e^({Bj;UQ+&~I<)FOSqKbV%_Yu0(qA z**pBNIkgpm(a2ju>Q^nVdrz4jIG2a4>vXuF8!a+xlG{Fc^#H_FDxFLF_oN#3fnrEx z{by4LL&4ulwo>>3*T=JdM~B(b-sa0Ln0h%D8+)Hs5C7gyEWkKyd4Jkn^azs+IMJ0; ze9_AB=WX%*U$NM^+TQDTxI3~fQ_hUu94|dARFLkn{h*~j{!&bBq~tg+S$T!InSaa#(L8gNDr;_I#mMMe7)lJ|S_x!Yp zve$N{s7|pp>}=V(BU%7}O1Kskm^azE(Df+emVX@WX3YoZQlH;L{JEO9dfD;onec!0A1P^Jp^NO7Mi zFL_=o*5zob@g}KM_o6uRx~Hq8slbxda8K|9es1MzRJ1|&RDpx|fvH4u!^*7}Vw}{; zQX`=FTE);`&G!)Y+!MXl@l)7>@1@_klXjsL0)An5u{u7g&>MT{>S> z5bU#Le({rU=veJ1ek{CgY1a-pvdk4&oB;NNX-WTu23!?wFzR`dN4GY!yO)R;;pS&- z^YV|G<9E|~Z-Rxk{F?nS*LWHK(Bkg6=WljbIbY~d$Vr{7*|)5&`7m6Xf-vmWzB-Cj zz2t%}=n2n(>wohde{P}v?w-3bu;N;dq4PjpK!s_8k#~>U^M_%x2MX%n=B%=(dv98I z?tAoxlhJ{VbN2%BZ)de#0Ub-48B;s$^1a$iaBndl%FgD1#TIJKv= zZr1nxDhOC)JP+hEG8-=p+qKB14^kfP=Uk;(thH$X@~h?QjTUt5Jv326Jix)r;XkNU?jfYyK}$Qs8s{{--ou z`#y_BvyO$`r*MBRWC<##MMd{YzE;$#jO{}CfA!81P5Gx#6YLkv&i=Mk>;BKg&(e)p4`x)!(uO@g*C@ljZU$ zqTTgSWgRX*F6?v(4CY=Oy(%%kvXE}^RRk9sO|QJNZPC!tGPGyJzCWlF$B{m z@BNX+ii(h7AiBb--0o>jNi#U=Qf+=vkMEAT*m=Xje81-CY{?$4rz3>aA5&wz{D_2$ zx0w|ro7*>Z0oSc*>*9(SIhwP*=6$U2-QX$faM1?W;a}hu{~axdlyoB5#<@Nv%x-OZ zdVeV|>JmEkve;{6h;)keQZ=~5L%qFWS8}YJgi!Mjn>LII7O&3Nla7qQ-k#FM^v7k1Wy! zQ)ZDrGi`9caA6?1C80Oui$+cTxE5cC_hc;A_q}l9&UjP*9J>E+>|~Yy zbi#vr@4G;}yLZXh=N|zcmk{=GHhetb72_a_wLhELDmCT%!}A3T3*^=Xjr+niW}<=W zA({AhwW>pN1zclh$Emv5r1x%gOKgxOHsEeVRr0C>z$)k3kH8gJ_N`v>j1pQIvGL<` zC4)SitIVx%A?AyPVghx`=6n$vvn zhUmz~L~*S>J4d+5plNB zk0w9PD+?Jv?SBwZ_GwprLWUZ&jhLK!pR9jBvtij1;PB1Yapjfh^O6o*?6zprJi6`C zq}mIFteC@WO-XGKzmbaSM*TL%HqSeVd!DK~`R97dNiAyI{xc@Xd*asTME!tZpxx;` zKF~HgP>L%d8HPs+!6j=<1}i#)!s?)4UW@yZK*Pih1hmq;HyrVm>~$X*C{}Ld-Kw_o zQb8YFaKS@AEyaDUK+6~5V(gs|>ZG?VRjc}AVc7+5l5q_YYZ3nP)PdT{ zUK9UXF^@#Ul&514-|^Q7dqQT$Z=-aK6OFsa@+Iz#tVS>hOtN1>I<0lK#(L8-2S4_twIUvrBt2HLfz)`S>cguSY|(Xe z2UstRO0s%Bhn9HYp&!3k(+d}`E_xjLxf(O8RJ|Ugqaw8NXPU6$<1qjImn8uTTiG%)jBi=8|4yPPj!1s_}T5jh%^I%#W|0uW|D|Nl^<9287RVj6X3cYmtZlWV+V|Z2ZI3_G-}m=bP^}o~_Xq?+8yBpZ?h_ z7v!XI_DkcSzfFo!i?)--L0(Vkz+| zjM@j}nn&cfJeYJFmgvkczxUx`I7bBo(QtjSl|JL}XG+e0V;|7w0IBJJ$>TeF6<0nz z3)F7uXw=l#nccEpDOmKjb+}<$Zok1DsIlW8e3YcLG4t_4!q0|%`GuB-@!0D@eAeiI zOnxVZ>5`aq@=_s~pZC2m;p@eKDSW8r6JN_{s&!OVgZo+&qH5swv*;3kb?Y@Vb^F@@8^c9livIoH%s@=%l1Ib-hFkb>>JzTu;^w4~ zI=9=zbtY=05M?GGecv~!V>Bx4k*Nu*VFA2f`P1;}f$=E97tv-%Z<+C2@080a=VmJQ zZ13p1NHq0;?VqS3>@y9AF7q{0ra#+w02;E#U6yD5sL_VHtRpu9Lv=LVPy;sQy=9f= zH3>Tnv9b<9l0bv|!_C_h2j?a;%AtNgy+7fv1QzJ~N&;p~o1f%)UEX#qt%ZBk;3t1J zfuD+4*7zQBwYH7H#=zuh%}|2keyVMX-E8h$ij=?9zcqMa@7NoE!vD*?mj84M4ZieP z!7;a|#7iv2BM|95JtpT{>exH7Io@QO8ABWj#C(2;U$CP0th4Z)LlZU6@seeE$V zydido&iuDCO4h$RgFAs1XVlFOlH*-skuAYNvqOZ65~f9yzES-?H!Wv3_f<5xaam!( z&xsBd1)ie35&K@(fQfd3gRh*eeFw<_!E6QkdK#8;L$p?5uyVTG$Z5gNMX#>J5*&M+ zKehF%`leP5TzvEicC6`grTVz;F8{BMt<1yCmTM9$3nq)lPjF!3@ zhUP5}fGY{+R6Lf+8gGnlNuHM*ZN)=BH955_iLG+Uc|#ip9XDc*bf(7@EVKnZcUfC} z=HJ=s+%i2u4*wa&x*|IB+&t8-EIq+Jm`6p8*Ua_-R;jgK9yD*je*wAm{`tg#BoEC~%#uav0se?5;&j0Szw^aQ2wXAHh zp)O=so+^AHXvZwcr~8WJ6@hwt@4)c6gS<+wu;Kp%Q{w(BW#mb4ZQL(u*TeFq{w3AW zxs4PkqqPn-hrsM+<3VA{bN@MS!Q8sWg8)dk@8ro<{AxoSDwxmP*VoJD@uwyJI9~K+ z6kK(LH+=>jfNJv7R*F805#HQ>)(*V2;5)hQfxur4NNpK8t(y085Af9i7Qd0Q-PjA`=yjOOtauXEx@j$#eNpc*2t}+`ZjfHzH z1azDQ6b)3>@+5SW<3Gm;#f=13UR`h<>+t5X@(zx(Z2PoZS!PQMm zrjpEeLjppAG!u0qG&~}5jC;J6W}%<9I0};cU&%{0+Ual`0E&Lpxlu#TtOyoaH=Ji8Of~p5oW7G``WY zu^1Mq0Y8ISF0Ca5Bk&>KQ{!Bbh09Xc!oXLbPgiV*#a!Xa4mYmYXj~8y_nN(W^#;9J z&6tbp#`E#KC|`!^HrG%wn}j7d+yh(ar7lQJJD4RB2wT%nZXojhGEXnkAeSxI?Dgk* zB3pR^MgJSV9JFjrQuu%$55Bw~=!t~K{CCqjR@yhKb;Dxf;Trz0O8qf4h|Z2!(Wu5$ zVC`M5a+g0g-BuUn5C%V!rc$G7bfg+T4bb(h^5cluYt zCHUE7HnK#aMIu%zCqYEr#r7x&D`pHfNYgR2gIlL1M7Mxhty}=Di>?eQ(VTAm@et6> zEs4nIfw5NiB>`#Z;gW!8mnR>sOBqg2t7m5`N)+^ig>=DkIP2)T{U_SMm~5^p)%!Hx zbMnW;obr&)pYeUjRW5J4SZqN$2(3h}L7nX5?)JosJ}ymQG=eea!X8;EUAvIQDxV_NNr#((@7!Gt%^k!# zfx~@NZZZsS(-EXd^}2G4HdlzZ7!(BCHkWgWswjUB+^0LJ@+9ebAUFTIEx3@vqO+$b z&IW?_Tg2pp=?akUA5$kN-xZ%AVt8N-(!yKmcFI{oQrU(a^iHi9zo!vecgba_dU(rW z9zEUvcMg|Sl`xXcy^9h|9YhzOa@&qd@RD3IkQ!*%XMJ=z&6WmPOcn=^BKO9`WCP?P zEg%}t{}JjVO??T}&kq_4o?NM#t=MnZMVs4M@^8fiIIs{0|i?i#IJViF9``lB2h`w0(cBtpWPvJ}~GF_gBj>9@gF& zd-ym06xrC}3=}p?Wbi32B;sA4L_}%CbPQJQ$l@~RUAaP{;TD zCvN{;>FPtJ04ZRDq2U7^jroblsw55n4(Z{e^|4&m4nJSYwq0m?ZZ|W)K?2C{93y48 zJM(|=rfHgBTbQRkL^KpVVna;UXr?~#9ck227#)7iMW4hv*enn*cC1nRW*S2U{T z-@FI#Y}shgI#P14aVVDu7$z=rn?Y}$L#(2J_9>orXjVbT&=*+`-i&OmB2S9O%<^qW$wq6P}uCiKk<%TQ1C?Kodf*U zOa-3u|No41ETF+#r>Y}%A78&GC2oHuXts_kY3(8h9$Qi_YR>2nCr}^e4qA|t5>}@} zj`@e%A>1r-38@@rA07agPJ`>t?!5H9(CWWXV%v<Wj6{3{h*1z%|-r zJIt)_1Mq_RRHVGcY<+;OeS`~Tk*m#l1n&LB>(s}hq`bd^V}Iix-mW*TmMU!SE2&#g zkg82i1rx_>FLB+)j}mtA(3HCQ<0kY~aAu}aj1(W=6Gz$uS0E^p>qHQgvwiIPPMlgc zLu$s~%4xN=W?+F-oZ(vcGu;*?U(Lh~*6o!v;pHC>!=GP@uuJ_7;8lucp1kIQ?eR6iRyDyU{T;-*7#Rbpc$SNX)rJoK(0+_bL7VFTPqS>`- zCRsph(wC6PD`gn%^J2&vxes|oSJ;4?r%Dan$`{ctq)~V+G~iS^al@i1#=ZJzcj~q* zKTxH>*jtp_XgQ;jC>HXF+Y`n8y{82XG2%Ys%d;+YDR|>ts`@m*d3pHqp@GZkIg}V_ zZECZ{tXVNeNb#lp^^~l`;ZjCnq02Xfb1Bme$Vs!sah=<|oij1tfe)IJ+8JlfIc8GvvJ|$#%)>%kY8`LYC)b zb%)nBI_1kwkgf)qM8)gERT1r0c?CEoMcr)3d&`hIaH!s=Qp#?1G-cH1;7%v>f_GRS zr)t_+VT{hB)`;?DdlV>(s1?5RH>K9h6i(FQcfEyEDNr>T$&u2PJFFHt$e`KMck2A4 zMo$=tCtW++UzRwBpwfL-^rqfIC(QsH@$%4Bn>+nPVRl$_*=S@I8R*J>H@U2s zIO=lqaDzQQs$|8C++*3SpotD(&h!Chf_j`te-N{1tl!OeVQT}ijQo3)MRn80F96iY z^6Y8vw^I#yM|F%?6Jsf+%dV#jtt^Yfq9FEJ3?7BDWmhvZABg~dpMfwVTp58;BZ<>k zG+AKJIAMy}bFBRsA6khoC?h^v#wcF;uR8eA#uE}DPP$9gqR4MPh_9TzP#;$YOVRSY z#l|(zY5S~tJ|QUkeZ*-NF>s~pem#QBb|nx$dtU21rxlUIsx7*bNp^X%3|KoDKwp|D zzA(5r$Vj}%Z^jX4whd5eh?ug|RbTb;*kz(_S+V>s$8Z5mXQH5NNecIkA`*7L9Ms&* zdcdysbu{V3n0SHxroWmq*pK#9t8fRQZ6_YK*$SJ3YM`umydxb>W7raN_I>vH$_smo zN7I>#mbkzBd}ceZ&mQJ;-bHcBWVPC|X~v$qx7c@u%oQ*-SI#d4)}q<2RDQv`m^=%w*th2JIL_UhjC^{TtYi zyZFyWWu>9$#31H@6(*RauVtpI*ospmxl*Y&XGinC+)te*dEFw~a9nSB=IWbm*A=MU zAIb46%%KHAcC+If^ujEQU?#6QrtH@ckiUVW&F~tI5QcDTDMjqlzn@N(HVJs6YS^BxGCBOOmn|QNoyxLY7=jV)}kHb z0%6*?o%iZCUH|pQ7Q|%G8-wgU!EFI;wOA8G@ceL0QO}9Le%NO>Efo0~hDtUrPG_xf zg5)v5g)nuh>d^2fA9E6WW~;9T)+I-3(0nAxs*<9HF#Z#oSa~JlV9FpEaF@+G>Mxnqf6)BS zFtZYVz{n3gZw3qK!dY!o0-0Fuu=mk=I9UWl$|P&H1l(NPoB77wlG=WubDjCfD-X@t zusS!fn}JeEUwdV1KQWr`s`Y?nW=#2bTGv6l(Cjg>TS8gCvsrBcWmoSdmKCN5=--&t zhaV*^`B6@c*A0XUxto5xJ-bwJu#Q7*MqCe0q08fh1`r8U@=UK0LdiQGT(`mo&78!w%GR4utl`ob)}Wuu~#5T|%* znR;SCv0Q)I66k`U$WmftZr~aaQ7p?5PDXT}$35-=3xgJWKZI74R zB!(Ru7wPW$NPV-43-Bm@J`-v^;wPZE%I2F>p#X*-NmgF%^4JFL_eXG(&bF{+rq>K>E> zBSqzypWIz=ZzEk@+cz&c>7r#+7Ub|XnR0}WoE0}?JRZ5b&<}}Fx4eBh>e~17VA!(2 zD1n_b>O6eM-57R0wK|*j+N<~=X7d;ld8)#E0MjIXu}i&Z)g?=J4NnM*3Qde5wYRvo zndpv#jQgW%_r$c7#RAf!7pr|AVAIJSAJ?*79TA6_9&sOuHMLt%PLk*70}u`(Lz4xE zHV04aE-H?j%&;dOK;?vkat*hpEg?pDaEc5z)JYpie(TxJknzFRqO@ zyD>4hYyK(n+l8e)V0`C$JsfR<;~cz1hgykaPYjqEELMknv?;q>>$c?}e#NUb^Z9A3 zI+XpgMP!JaDQpRI+6MeO%<}F%!+;il{jP+eMojXUva>0EW{iv&X*ki$pu>0<*J!OM zI%?6t{XPikW2Yi{CaYy%Pn^R$5Y|VO`kNB(aGmTmn6J!TYu}2?jR*`)FzK`ad1OVC zt(LyVSoP}*b*9HC>-XryuMPW^ubix-%Gog2vn0$$U&UbDOlPCVh_U6Vdp#)fy>UGR z!+v#g`d=|nO-dX$a(gs009GV-SnF7v!wSN@IYgBj31YhDH4z0?TaD)R9Lmz-gvb0u z@BWEAIU<|*Aw&8jQChprXu)~A4WcI&q{ptVD;!;yN7i~>-BEZJr-u5fmc zQL~Kmj`fVa$Lv%O?Qp{d@0&-}cS0Z;Om1J2b9(yrXQ`G6^;v}H6Cy_Lb-Y7j9G~ z>>|xw%;!pZ((zc)Y`qI{nk&~gDw#rJlCG^w5!bm>T1?FHP1wW6(%DR*OanM89^;*^ zw>c1<{DFLmE%$8E|EPj$yWh2JKt4}vFO&AbIBZ+MTFTubn7$k2z;N6YEbKQLyQL*& zh-o6?YLyvZ+eL~3`z_y_2W76T9DJB^g=#UWdx{M?;c@nD`$e7H(DG{-$&RL`LA!W19Ho=OT2TxnY)0yV-jQ$4{1t^4?eyS_h~%$5bALhqplqsMB_rtS)U$q2)9Ue zJ`2cS+Kd!Lp(A$EW}wUR+R`Q-Oky}huw+_V!g?7G)g9atVakSPm2 zb*`aIG(hy`)s&0F7XdgN+uslq+*1?__8SeXHLNLel3_D9T6WQ#lK=)A;c`GAXI!61 zvo&-zWgkqY4IVB^Gt8LQEP3X3!o@iuDZNCglZD;7j<%_n(D>ss&O90MxGL1iX>Vq) z4QGSC3XNY^N19Sul`tQq@ok(w2P`B5LS-rf+tZp!$fvd+HI*(uCgt`FARG)9f*7)0}HKi<4@ zFv&*ca$Y#d zfDR~ox+j+|4ztYhp3adTJWLD2+^YxWcAp;qNJ5dAJ%?!~AR@EEX`zZSb?vaVj$<+c zyU+q>L1N_<*o$qP3ssIRIVx-3=qF@QDp+ZBkNL*YXKCjAWk2cB#v6ZnRJ0YvzR{Ee?`y74zmx)X3hr5;jrZ!vvFYqulL<* zf~Ef^l?zB_>@k#3$O6cnqYni+Af^4N3EiAWsFqf&^)Bv!e?2Si5KA~5+|UDT3QFhQ zq_{)?Giil9JJ9%AKk*_uPHn^lVsKn`zed`(D2VnU0PLSh@#}u(IZLlsz~Z@^^f=n85}3|LT+etkrNBjljW!Qw_Fg8*^H8A8FwkqvLg?SF&wgdJ)Fj(5^ro0 zJ5*S%6CEX^W}S%@GKo72$(>ZhUE(0y05c0|%{e$6=4Y_`VZWMVBZbJcoaOj(TtnR7 zoAML<{d?=FpWWyymv(A+{uE>_{GdQ_p_1}8uy1@DtU$*6;%}H!@}g%Z*>YsF6SpGo zN^tbV#~h>gjaHDG0@?!eiKd;7>Yxm=r7Ti~gF1EFnw94)DbcY#Nn{hb6z@@{#TE^})u0fZz;#>&Si<4qztiBq3D6v81E+lf);TEnK%MTizwY|zw zanxRq+*fW}qWk+kOpPcwc3xoZoQ3vb!XQviv;I2WNjG zCqJQCK{7isiIj_F%j&Zp$`pw>hBIdOSaqEx*mqHs)61PnAHjxJp<^mQEXq;Z=!4D% zzf|~I9<7ITyoHn=2=nB0azmM>fQg^gsX?>CCaWeIRGOpv_kLnW2lsxB^&DW${IrWa zF!sCHhCe%xX^LaUAPnGZ%n(tDf&&gJ;~>(nBtvGa<2#Daxo?fO<0nq{N*keFI343u zp6K1I_&Dxh7h;|~e2gf+;-j}{Z`NtWZ1#w4UN_m*RWUAF>!)yf ziBd7KPt6y!*?+}?5h6~~@PQR70f1+eB6qLs5hr!(QWoL@iMa<>^JLRf}-^}4LD5NJHJyL^IU>#m#`&M)z= z3L9$Ja&l@BnJR&^^eyDvS01~TFs?2ya^5}ZvErkP*tv5~pXQdAm*e!JcT$np z=TiQ7oMGyel~ng`d6_wI9F&G&MR88Q6tBh3?rr~?-7`30(UNY*F8f2QTcl85#gbBSLUmemBprtWNh#f4$V=?;^;;Cu|TkAff6YjmNpn>%X1C4QrBph z+CvP5HD%W-Ml2M{e-^pN1X#bI_}mlOUyc#swDr*O2fZ^h*P!!6td%hO^HJ&o@a z(Mx=AqOnf$hT;9Qh)S&Gj5kt%qHv&!GsMyBcO8X{dwE*7#WmeFau-Ckpz1}Vlf5b!B9?RWS z(>Eu`2Na@pk@t!R>wBZ0Z`q05k{0L@Xl)M{2g86>a5UEMrT#{dBKdo73TXXgt7FZl zqlQN!a+fdCYM0SFz(_=O4RkN`y**+lMdd>*4-5d> z`6fZ1bSI7_R0X@UBTGL-AB)>@x17X4VxKIhIwSCRq_02msCv1RrWJwhkG!>9O^6)M zZ$2gRc}u4Se2`w9(ObtJtZ0!=NXGp?!=asB!_DK2ICAjd{^F;M{2E14ri~Kwdy|U5LZbsB zRUIZcr+0Lq_3CEz|vGs7JkSsa1$}mo+-+K-XDF&$+R~ z656@UcMs=aG_?gF<;9niqTi>bBZ_WZhZ&r%*b%<`ICdW`0^cO96s=0zx)13P^lCqz zDa_RA4?`B=(wXAa#DPd2^4}uCR2_Q)I;Ld8AjHE63=0#DFZ(bIA`KZbCV5~pJ=Xn~ zwDdBr(z!s(IVfq0>OAuRZ=$-B!$3Lo#7DRDpLVch?wc~cE8^r^ubd9c2zHrq{%SK9 zrJJ57k7ERWUK8;a2qhMbUEECz%~bp9o9re}(~Yke@@qH6+NRCHrp`GM4kOOKwn{3n z{Rhtn8tw`|>AQDpl5-h0rP6suyMgqui{pLir;MN4d|xVRm&CL_hW+cRnZA~6+8;FV zHjcyu`}p=p8lP9nCQCoBl=E>3Nh{`4KqzV6@7hoMQu8_X+1JjjMZ_@ zKO5|w*&Ao|n-%R7D7@=4`xuo0dMy*8oCdqL^?S5i>?mwKy0hR_d91`}v}C~>H-?6# z;+oN^(;!=gCwOni?c!E3@G;ztb85XZYP@-tUecjinS_p!f;x^+4^)&WX<5pEOGiqbt;bFfaE#@ObYJXf#q8m?vi`1n_{927#H6wl~LWlZ&boPG6 zs7xL;C30mnW?%+dn5!pP9&e?xo}hd5J>EmkAf0*ad>m#a!b7C}ymOS%C#UBPeRxqj z{6mKlh5Dr31d9~XPz(Nd;ty@u`ZXf!eGL1iwBE-kgPIs~`~}YZ!;Ku~U)a;02fxHc zMxSR8bLzfh2lW)=d8boxd*0GBg_&eM?ZFsfc6jN56~guI#-1A9BemErDTiFHVxUOL zJo6t$Qg6j&LEmZVUh1gvDlBd%4Mgb-2)U3myQ< z(o--lNQXL*_x5es>CnTS|1QmbPF%T_BzbMb>xG1;csD(!ygv%FJ63;eTr2)cJ~e4k zp7MKpN0yrUA5lI;XgnnEkDc_8<$1bPn$nH6QH9033$mcQ#l(ohL)}tcx@?FY1ndWT zcZi;_dv-Y;Xt;jdFWLxFj41vBV5b^+YgOO=f=SM4ee`~*;Tuq-+&q_0N|D5U2Pe|n zf<5q@e;MMf{)>9!`Q21%!8xhpC&FJ^6kqrChfRn*^0Z9GOBEeNRh{h4IsN(^f=q8- zMO?`Vw^F=LttC5};&B=Dyn_!OeqSM$8*%TPBAgb85x&JbwEVCg%m{G*z z86k%hC4~a@$CR8bm42S;J9@;9UiJA%R<RP|AB7&aA&YgU$sC`*D_Vk5P_)7gVynnoiWvogmn zN{>DPU5s}vVyPQg5H4CM^dFZ9HPVW3Ku3Jc)h6d{>e&ljMq$jWEMgDVqAYpK?(T?Z z>seyUDKC@S*`B98YrUJFdwsT3)cpaTC_c(Md^!ULYf)~~qoO5PF2_kLYAe&cyOR{t zpCOIi$C5pLpGR-Nc_L!b{n4V-e|=6!^5MRwm`2{c-*2VT*6##MpZ2Z|iadyV*7HA1 z*J!SsjZ{pcG$mnI<(7AjLHhoV+B4@wk*v%$w#=A#<}5CqSge{thvmpd)8Nvj5sHqe zga>R(x&gOnWQa{r@!#v!8s42AJK8G#=rc9>jU(Jy7JhlZ1yEShJNw* zqAm7=r)3`Va~fLLp)%pbl8&7Nf5pCsnS_nLr!wEkgwMaJKQ7cw?S?c(s2weSyC4OX z09rj0uO@eb%&=xEV*QcnD|54H9opT;hjde{;Ok%$Mx+ny7I~PGqYfkK=5+4szC!p8 zG8R0$aW|yS&BDXqlXo;zi_>kDJ(qGf7t+Fj15WlM(xGPr9JI6PfNPPRXx(S$+$^eN zK1~&a5~Rc-A0)Zcp~7>%^c~BVc9l>i=JCucF^(#;+S5gGlvgvhchG}9EwE-DR$0lCPja+yZpY!f4iYS+O0Q=os!r-AL85GKtNG9`aJl! z*wFYYMqscTbVK^a>5uQ9e^VQIPsuJ{&b$8|ueJNFtKcM$q2vj&VD`D%!Q-(>`x1g9 z7nGe_0>w|WQX0l#;kr#-JI+JYxr}FK8(in@%lMZ##5#S`^_-d-5gDGmwPUHq_Zef>nBq$WBl!{H_l~VC}wf6CA2J#6}OaQi04;J`;>BQ)h2X~ z{+MLxHq!NU#}2FW^0l^I&2=2M*9QaJ^ld7J+K~9I}B(0 zZuhgUDH#lHmW)c!7ddkI{=ub#oVNnqYhf|d_Do~UJAF^lMfYcqs_lhWwd_2WUg=}+ zYLY$rXNC1faUO$rvbT@Yn@mNzk=5b3JHz7M=OXHos}zo|lMZ$6PQbn;UQ}yPh!Ilb z0MB)gc06Dxt?{~9bbQMvWtO`wTm4cXI5s!ZDFNR?ogLwa9Sorz0C}(>`*iVlGt;V~ zH-)}?$=mz7txD3en`A4&j!d-^byinO{&CTi*v}f844n`OOa*3=hyQRb4E2aAceSme zTgCx=a5~370?#RB9n}l1mB$@BpYR~sen?+PY0Nx@T>0K~)QOSzvAaneXgzmq^y>Vo ze9>}OQI@OLh2V!R8j9w4T*pxfdeQ$M1EN;?9i21>x{nk+!;!$&&p22Aw`X`#R*C4b zR?fV4+ONj*L~A-2wVGVBXH5n5v3jLK$C@< z%V+x#UD-tGhRIx^&7=M#H^eGJ#1PZLW^IG zUJ~$*Ij8@#|0}=-j=keR0KBX4-GH@5X-~HD|M6^_AbtvJ^o-+W3zy#U`k&-gn;uQHqWBSq{>JGHbOS@{tOpDwm&% zx}$FgB8?4RU#wpJGg!M);&jqtb1cT8>kMR5lI4&ZapN_4fgu$8XkvL&Z6twzt~nq@ zIL?Kku=7rWr`PnP0@_*s2c38x+gF76IT|C^NQ5h$tFPGGwopqOOF!m;JAf1q*ET-y zp4aN}m|6Utq0x^m&JuS?V!K6wMee+b#ggcX0@=9$T7B11R~AA!HnZuc=)w43u6vG( z-r;;%A@G1k&QSGhj@){jB6sOFIxTteyIlNWSj;-hE-K{2Esr z^7_ll_oF$WNY}p}gCN)&H;8fIDh(doH#$wv-9Zo{P>O_*Tx4Q0NaqDhLeK6NZVfN7 zL%18}gL9}*PRS1G32#gd9<5fG1vD;OjHH?Wug(5WX@c8md3iseIvTNH!b0P^Uj`MV z68-!{pW*GRP5v$mm`)0ZE`PJ_m#?YyfO_tCWQNBOu4&D?2zu^Y1wQ*!E&$17rhs?` z@3Lp-+cu}>S<7cF5H3?UDAa^r{%iy zsvMX`y0ngoCk3yUlc7Y7wj{Ip)&7TIn1bmEjj$8kOk$SAEE1^xw2IClBpdwyD7q4N zDBms|70Fg9Lbim;mVGA`LMXB?Lk8JH)?p@O%R2U5k^JmqUj{>DH?r@B>31HsR?gxF3@Rk8Un%{{tg<1~S@w z)Ov|#Pzw64mZ%hu_(fThGei`3IbT>8_saAunB-61g50aULvq?pi!|KLxaIlm!y^pH z&gq|o$@CwaY|nc~(!y-U=~xGY&nbj6=f_??flZ<*WQX51D7_QKM_VPMUEDUOs$R{E zHifu|7$sr`abdM6RYFuAP-q7`sh)}5YVnQe&l}7W2E3+7dR|BUI-b0vh4@29*%`R; z&mP=*Tb#?}HoUt`I-igqPwDsCXJ`D0NaIk0S(od?u`qaen{rLZzf~^7icd#2w_lH_eLty;7a+2@yj2c4GMY_cWY)mSHLVTv~ zLu96}k?{sO6M}!+de2=&irz&^xzx9~g7YNE*%?POpEjk=W&Ds|8$Qb*kk!8wu%s#M z%$Lv>7iu4{F0w5SA?WUx2T)VpdS#1bfG|1^Tc=EjI*lV}hODuBbD$pRWnIq062Az@IbxqL?-^Cja{rOZwswL;sB9*o!FXt!Cw3fQFN>fS!(IbUAWjo zEy{Rn`6!a{+Kw^`qgyle-X3MLGoU2wkdKECa_Xwj=*o<|g4(MTpC4AE?{lz9J|0#` z(m;qmcupU6%+I!hHj4)0QwXddhfmkz6sj$eAHA`06eE_$lnL@E!7){d ze8?*4x|>zXUw(4q7&puR2#inaxw+y(iF*-qo~*MKU1> zgv*F)W~FrhsZ6#@+%K^f&7qbIg`$D-lyU>)J@4LR43elAdTcjTnCVkDGM$u55Ufsk zx4bsb4RML9)%>*JJ=O2IZZwjGqfGX@3V4Rgn#RBDxSOSETg4Hm?%mOnyOme2Dd-wh z*{~^7QKW!KH@+~44}PTJbk)HYo1L3z!|UQE3mI9-C)U4B{&`735A0^pN-`4Zr?Ko5n-^g`8^SAU7*CHBIs< zmC(eDv0+H5^Agmlj8u^Q{EWxsq?1xQmRW??{npJTA?tadPSfn+}W6SRqU6*MRn?W?ZFdNa5Y9BjB zZdg6@k6(P#l5}C|VDXY{d5c`qTQA4y)2?*rwZbXB^{BVfvSG!ZK38Q9bW>0(^;43OJURo?pv+Kv%m=xinr(JG#VV?0WU##7mV&&hYdm|v+Zl55vM?>jW7a;(;KqY8@A`TEl;WPM7)1#&6xlu=0R>%UWg zoyuvaJ?^a|lU$79WJ;UTF9Us7n{fF|AJ(-F&j*zD)#LDCOn zNtfs?Jc9e_TOV-zlm0qWaa13VA6Xm#~Q|0 zeCtW(sNPqGqSsfB?~*6`vB}2bTD<3w>H8bHgU0;SiM{o6IWxSo&bKFqHB*|*xhvW{ zIItWS70^nsan8(rJ>h%eUR4}fix9&v=6c9U?@fW<@qs+)Uinr?3kQ>wI==p@C$SNM z@gE=6O0B<9Hr3;$P7F@i3|ap*fRWe)d;iMLEl9<-5Dj=zz7~#>%v$klYZ&@LP{?_xlZmc1#!^z!0ulmex{R^~shEOWZW~VF^_{>~8Zr>UnBdR3rYt_qMSd9?AUnS2q zjkr)lVnyc{J)@QHUTg;FY|037-P(AbQm(nqQaSL&)~5TWon+hkZnwoF$v0JL)L8>3 zj2l5W1&>IH8$$jgm=@;|)ELEuJgUkT9h?~7nF(h$ES~<^n=?gBXNmt{KTb4YG|Ubb zY;k$@nWaW>X2BKznr!xL*fp+@{TbG3=Jo~0s=(V6R}aA~OVz&1`lSO}%59QssU=cA z4aIL7sK?16{&2Y;aJ-Q`j=21moKd`TM6S4iDWg)%B3=9$W%9nn$wqd=*rOTer1%k6 z*!3FvEQIGn=unWuIAqu*&Z2{6_|7Fr_s8mQgtWMLt7`JYjjG!7-N)*)DHhBe#L_Xz zWY$sze@2k@_kR0L8FZEfgO5cW$J~J4Z~ugQeHEI5y(;}R&q$@}FYE>pnq)N@$oig| z%*TJfNbg-`vmc)vN7&X&l#f01qLhRXOVvku@=Ik38cH>PQgTaNZMq9tqOatZC*HWC zIV`Y=xRj#_yQ(8;?QOz)iX>Dztz>SLahQ*in;^i zGao>bZ2@HgRVfEW|k+JCQ0tYkt3Tddye zTozyomx)^wg<#|Q=ASomkt4gks6U1yC1RkhH~P9p)i0Uy3sFE_D(8snYz>e!pu&Oq z$g9~5@Sm*EOjoJ6p)gvqTF}H+w*o==9dOvl$5$9%cI$TuWT$CCka7-=aw4%mQaj^v znX^RyHSf>Q>o_JdidS=nFH2e--5q;aLbDq4N2pKdutam1gCj-nl8b}=g;a((s|1JUXu5JC0$0)r?E;bvE6Wpr`$SjFUhix$@OL)=>UuIA7|k6tidE)={S9CckQl z>Mp_i|7pm|mP=%#X*Cj6QBd)+d6WP;HE7T%HH3n7gaKMowCx8p;42G7!VwEbF(Du{p$gq-evwtYY z=`&-CCATYWl}W^;9mizN^nBHR@$ zt-xx26@V<@u*p^Hcw5IIEk1V5XJll(BrVVU(cLePZ_`~gh4g(?DHtb1GRpFwG z3Ozr{^Gav+XVn*#T(Jdzf~6~Wz}b$O@ZS693#v}!j;zmhsPeYJ_}d^so6lId{hcPA7+0+jyiB9XtFGLHja|JgKH_(|c-|2R_MCkFCFevGG-2>w0sa~*N17Ze8b4%G&Z zcp6khV=q}K{Q05eCb#I{pBdj5a;9vfNNZ~pBYTS#3N|2n>Ka*7@e?z-eC)V!pzRgl zS@mtwi)(KGKO~bu_AWsJvrV_hQNg6MO*r!FxU+P%I?i5hA{z&dpsjqKvX_vJ$^L*FH4eA6x6m5t zFshm6yS1?xF6bx|T*M)*h~-FW{c1_1OcQIcq*95UT1sLzMfqY;p^l@*KXbX+O6J#P zJd=&;H)RlGdTmcH1Dk~DzeM%$HRP;CP3E|rzu4J(iR(5|#_=rgk`+tZeklNA%;P1R zNSzgeNu$>G$Agqoi3x|emaQ|wuODMsiLB>@;(u~mWp>Ox|GAoA`xb4J{fAw_gq6Uc)zN>Np}<1M{n(%#9tb$L{BHY5q3fIK{VLbCLDT%xVMAgNaY#fvHEfe2QKuh z({dE)BvJ5kEc#pPm#~nm0MF>>~%33h~;DJ$BIWW-o%#DPcmAool$P z{tF#tWt9A$T~9nO_@2VEIyK|qCxVE!O-T70B;AAekg_XLnU!pg#%>QD3Jz?!n4B)e z#pM~gnmAavP;UHkG!7Mf%a`DRrPA9?Icjp9Zqq53$zu65xn72M2Rgo!s>ZuJOkcU2 zaUzVmPmXk|DFr?OX$Lb+TbVUQcv+t1y3tNMlAd{6HuBv?^Y6%Yr6Yi^zBwwp!lYFj z*8WQFYvsWC&2(i{>np?`68z=-#d&2ao__0zeL~9hmy}zqn_|Xiui+JUJ)hJ;es6w| zuAz6qp);XIHg!=N>ZsL}RCZBE9g6UEm5pYIQG7jahdy@Z0`3k}9oTs#5s(r7hks4k zoR7giW_j+S?N=^9`bJ~tmJrWtf@RR(1@PD_yFEm~rAXz58Eb4sM0}ro?0+-`9^YGh z>!J&l0;0BS(Vgz&;8U}|(^q-YGO(%_F{)FH5nKn0tZGjz6;D&oO0U~eT9lW=FT@^Y zlXpBW_r@mfy|TYhel%aYAy%k{LGilGJN2n6bObclO=3uNoWG;{V7l?K>w)Z;uHUK4 z@QRb1MxHYS2+)ko0@f;66QUmfQom|D#t7#hGkV%xsH*Jg5IqTK(|hl2(ZpzoX%VtW zXaew;hhp@EpM)r5UK37ufKxH5x+YX3x@Lxq7G_`cA$-E|7$Ci~)36S7tuEG<(7eEG zvO$&;uu|W!40Gdd{c-WL?Qp86P@XY;>@ZK=+YNpGx>vzEdb?LxS5uqRZ1L((LlFPm z&G(vs**zoXE*&5D@>na}U^#fa3d{9&i)~sWacTgzcCK?L4={>hc{k~Goo&c#^bRb4 za#wQ_j^Pt#H|2UQ+i=bT)7}Q%68OHSdG|GH!`dO&C>D-gw78SUiorq&pL73VcOx3&F` zz;A~x`fGvO-#9eho0iPKDxp1FRas)!skg3t(Uw@p5v09ibQXOw)0<2hnD5c~v@E1| zu+|XuN@1BPRIlM;{gVRaA{X~P@zXNUS7LTSMp(OzU%E;GGOQmk8vw$f5FDtwaa z8P)kApmhWwssHCVa6N!X-CXG;sFctEdM9k zM=0z3-O$4B#)KbymFBwD30CfiuDlfa96X%Cx?JVddKGJ?f-JHIRw@&v-hh0`wyBiIi>t={nR7A=e-843`*l)WQB?WlT@|j#nFs@ z%#dKK@7Ck9c6Q9E;V(Jf*76Buv+*jCo5^d=t`@qaomwFDFhtL9Xjw^sGjwWldODzM#ZP8+#*)fUwpaD_E5TH|GLp6pGN))|-C zmmy?Ft`1@-=AHi!KAe+*>ka_n(m1T;4-Jv_vnb2O&FwvQaNH>{L^UXx%f+SGapjmX zpIx<&dGzE|bGG=DX13f=a%(8Sy6DbEfIiqPk|r_Ief(2pQoX@hz@T!h`C!O1Fhy)% zlcaa|pj9%YWZUbu&hqke*kr-} ztGhUUMlaARR6uvF$eyV=Op{_np@ z0P*vsn2qn%khr)dAR*qK(0__&+EmdKe{s_I1LIdJ_n2WZ%DaO8i4yc`d|g)h)744V>g1bB4RpZ8e4_>mM*WKgCsHp2(!YOoXd%L(-_>gxxN{UM^bL<}ul80gl}1a=D{U2nZRJNVh8YVp z){J49jv)g0=}!S+^}72ytUAoD9gudVcgG#7^y{r1%P@caxtDi?2}1_OHfl5qU7kER zC&xKAXP&)Og9JvZ{o*$G+u29&ouguQ6Y$i8rK8WY#|JiF1de*HoVC+MF&)C^nKxQW zbWKdB77||#S$oidU^t6e4nxQ@3RdyP+R1&&G;%m#(oVad`7 zQTw|G7PVs)J+P!JeLTwb~GAERyE6+RF}V zL)B6S-IB%{pr|36o)!;?bewJl!Q<~qc94C()W-^Uk}LOU=yhVR=dCL;&+z)QV=L1o z18?pHy)JCS?FNHfhvd17TdJ2@S-3)GIQ*pCpNU*=ePb4LqLkHSz=}t;mkduFniu{y z?zM(*vF_MocH4BmV(T0uOIbBy&!Fhs{ufpP%bOsts2NN zrz4r#AI?TzdU{H`Tm#s-TJ{>ADKoP%3(K#VEYs_+WF8+-tw4Q#iDwu3T_#~GDq>=bBBeu49GWkR%t`!45csx)iglzM!M=v8VvR!R($r6ZdR8?#nzZ>_vXTn)w|wB(|6AP&6;D^ z{jc47Nmi!Fp*Ksi`4Qv?)RsN&Y-ZbHQ=9G5zfuIGzDv1jRX24d|5|wC=T;;z=NJED z(J5R8-|DGrCeGv;#yH~1X+AgK5oUd!+byP0E#~)PGjPD6ez>oRcCPJ|-frY`Q^%t% zxis>_Enk)x6F_5mTwGE9bJ<#`m&1C8tjCM`phOL>b>gWV&~!qeyRmVF8ujDAkb_m*iC5$j0G0Om?0nw9J7FqSR4J|?z;JWSxTPSl{pA6@ z8^h2q@clXKgor!Vl>b!R=tK)_*^UVbxeX7gm4==ev$L0&a}R8mxTqxK9yX{E%KB_) zOX2wuTuYe)ghGQb8N2WKBLA_Obaymuwga<_8t^)@6|hra_XO#s$-p_LKL_6i>^t)6 z_FyySoDMEnowXZJI7s#aA-NxIGz9mrR=U8%B4onbe{A_93;z0$*5Gz(|Ecl7txum+ zK)kqAwJ)}9b5DfjKZidcAisnJOv)?+LO6oHR9zLJb!xxdvcYKnaM}$XkhF9AzSWX0 zz^O<42yo7@{c^7YLvxZ353RtGS#N+N=HelKMLeGMSZJUPUe&zUS(WAD@r0lmzQD!kRl<-KU*GoVZ}2}?TKFKg zdRvzu-kG0ue6!JXL>C?Y1U}%s>+?$Zb28qpZc+OoZOhW*-3{CB68>MR=*Ld3{=nm_a*Om_0Rhj@$82{ zjctF{_V3*v&HQnyx6-AIgV@lsvAUh(_x|Qr9APNE+%2C3_Krx&vl8>eg2F@LPsJn= z9f61;jCMj~#C8yeTMU^0FEgXB?S!Wc6}+?Mc`Z*dLRf^vp(?Ovuol{1=RE(y+ZsZ^ z8}b}7ktPps5`J^hn>u?kaPw_RC68q!hPXVzLga7DzU?k99A6e<3^Zse`BFdQ|J8SR z+fE#tiMbiOlXoAzxReGN@U@pp@(+oHIuve6)b?Lf4i){Yrsf?tYGZcJoYuj&W$B>_ z36K?dsVqlS(Njt`)ILgJla<@)U+r$xx_NM#?Iig&HNC)OdL zW$Z#A48`WA?q@Wj++t8Rop^Rh%;CKQ%Gm}{q>EV;%ytjaz3i~q-2p})={nDpO$4#e z+^324abh&rjf<@r;_OJ)m@Sc+i;uh7*MD)l;n7}+%fEdCLlp8J$TKGqrd)iID>5^d z@d#88SiA46b`I3rLAc+VOImJP91KZZZwIR+U{)t@SZ-Pp+CkU| zirfXRjXTyFzPYK<@w_?i>pb>eEc~p!ld*ek!F^G7dZ0cA zxF+9{QvqO$P0kw@X9f9U?yl1s-#L35&O-j$Qf}u!?;pu{1ln|F-}>+ftW?kj!#?u- zB2$s4RDulo zLw|>Gy^BZlHmPQ!m)Mn~IO!{ktrX6BivJp4%(HvAZunQqnY|AA zNv<0fF4HmC-C9MlH!*|kG%agI?*G2f=fOFu%@7LvoFZ+7h)Vk$qwaL0kX1(+Y{v-r zJ0wMn#8NpcRJa9ofVohI^w~vIS<~L7Ykn2GSMNm4hoNg(Ygx z`*r&>PU}JTCn@!4CDDTqatN-gL$HHeqci76&|yp_quZ3KMt}aAPsY|~!2=jEL0`Q> z*vI%hc7g4A!U?3#Vt#KFszS1Ig|ba*fa>)1v>&>D!!-*nW~k4m1XB;arqCBBxqis5 z^RV*l&>Nh%fJvROtEUfd-Ue#O<*(xe?tV}?Q$5iiWa5LtO5~T)jzYA$4rH38kH@EP zdj4#yy#|?XeSCHuHPQwk>+=A?9aH_SoMWKSRJ?rc%+vYT>7S-XFn)%+QL5>+r_A|2 z-XGkMr<2zwGAUUqur-dn8chBEJ^r;yktb-kvc1?OU0O3c#E3<06ahd5F&cvZ`~JR| z^KZr}9jrJ1;>{U%h*S#s?cLztvN+x>NFwCrdce}>W!xtAkR@oB|Nho}Z{3*_MQ<{h z>+m`$ZMGPstSXFc`LB%+E3!akvUFksd+TWWyvp@mB!Q(B^W(&ma`(c=UvC|Ea#gdX zgHOE{QGSrmpEKJqa{lDRDEXXgPS9^iG?=!R*K8Go28eEn%Nc3I_hx3bLago@_6ox| zgLA2e_I`A&&_5IbY}n;idw2NWBrIlHpRejC3dTC!h8*pc4P8%7-s2YgMlN`NGtgl# zYRQm>+a9v4kl|*&WkCn8+I3+*5hnr6mxiecMqDVf6mM}vB9sVV{aL1eI zNL3R2b;yS+;NNnA%?1)%g4!X;EPeuO4{HPW8T`|*x20MKY6O|iES3z#*-de`WgXWn ziB!8nFsiSGtOH3fGh!$$s!LDMf!hLjM$P+gJKqO_FU*?{zz2H1u9F^Y5Fh}9J!FVq1bUI@Dn11+Vu{Wcl&15*Ib(SKB&$bdW#Xml|Y zIHmnqce+PGY0g0Zo?ZJlqe6tBpiut>%e^^NVVM8m1(4AXRYb8Bzchpu363N$CZ5E! zv9(GFw)-plP0?e-2INx+$$65~x2;kS#O7$?GzL@`;lgtpH0dv^-?Yapot@Kvy3(5t ze3wU&n26jpPQ?J?m z35#Tby@6L~O>uZqh=m*!Cr)BAm#xD6S*`-nE+7&`gF?IVMBJBa`j7lolVy&GH;Cgt zOv*`Kr#st6{KSE5ReM_UxbDpZ-AU@&S@t_M-5;agr|+?gaA;E|o6ih`79&rayJX{s zT}?lH&&9R&fEI-X6LXn!an_qf1<}@r^X4@y!jQ$UT;Q(P=*_L^RdF>^vYSN{SCS{g z0(2rId4PKbd+ovaMq<613mtr~H3Bxh`v5k??jGO?-@i5eZe!-Rd!S9$tkkIaN&&+% z6ZeQ2Tl@C$`;H+;S{RNq#ce$pT~8S1I#hZ+=rTm-mNW*;;no`xSt}y_I=zHvgc)C9i<}F7aO$DpL#lu`B(3X86JE0+4 zW1Q&uqh--``4)+aIqgbl_y^EoaBn4_QmSXkFUcxLdGMv>kP5J^vFhB2@p>QvX|+z$ z@#c3O!!rsluHIgE-q$%d3ui-H>eEl^Nz5t5kjX<8Sf ziO#4QwUtUhMLwvY?^4Z7O32%^QmY+juRUV68Mxvp=ON|2P~{5S#=)HOJld!b_%_S< z;^n4i=Awl>tN!{AsAIk0v>eWCQhMlxvIV*Rak*ORx$0!lF}&o>7rj6vK$!<4RPB)4 zY&Y=}{Mw=<7d)tC4s4(Z@yJ&aG-~P`1KBdjN4iN$A0GaFNurh;I6bj{WSM?knc&9i zF)@c=aP2a+d2@@^-nb!e(f3>UedT{smeZ9a#oU7F+EWd`*`*|I|e9y2OrgT=G6@3XWk z8S`m<38u~{OJ|7k17%d&*tvm_Ld%fvrOlVa-rTj0r+XKy3W)hE+yw5iNMGb>UoqHI z;ZTWgtz__GJv7MKDLAryxfxSbj~&o5feu_qOBOU=hozM0mkVz36a5B#>jc*@?h47i ze$eCmT!h74#Pjk7pt%Z>#cHmr`nPxGE|D8DP^rl69o$P5Y;?@{$uiQSX?+$)HJ3^_ z(I|K(|H0EuDhthm`9>HZGKEAIqcA+R5j&UmE1W}mDX4OTD2@3~VulITC5UwUwa+I| zOPzHn-_Tf}$s+ZrYo?0k;Q46DI{86Mh|zqWPk$14Fd0~60uUH#^fZyE@@H5~F`zl# ztND0RmeRXyhzklZ%A+hHDxt zoZbZvw0^>~lAqci-C&nHSd-Y9sS-ba5ql)F5rDcf(DB`6lS*vD?17qDHwZ{IzQpT# zTFY}5`v;&5mp9?CoB8#?-fJ zvk_Bik%PV=Ja)KaZbyr17Pg^_IHvQ{WSW|42e}tKYV9{Jy0sXW_YP%gjIO>v zCaZ+@pe&Kwrcbxh)?%kuzTS;e^+K@zh`OJUWc{JkTHyfF?PpbTuQ`e<5JvCx%Oquu zhLSv@=btRXV8^#jI9dv1>UF9x?ftg$RDUu_gul$!x8EWJ$I$#GmiQ)rZs2#- zo@G%{Dql_E)>F^4(~Kj7MY?pXm)4ljGSiiUXprPR;aXA{Y^z4R;9x4^>gh4i>?%`8c zNirP(u{9py*XfDZglapv0*C#*MY2U6w0>Ns57AmifH?&R<$FmZKdtL3t3|_eT>8jmoPsIK1ngX|$zK}O>zYcj#l90!6 zX1_t+N*|-goMfD)i<> z23UXI3}7JtnE_=FYgyj`*@a#8o}6E*@Kj}i$w_lwqes-Ro}skG52H92Yr?l{1DEUI zFtu&P4+V$TRLi(ykidTHRGQaGgU5h;$!Iv+Us1;bohMN$0;{*kR7*m&K#z~%OQ=um zO7rI~z^gd(a4@$^l-fkVZe_jey95!F$UEm#TbBP2H*0A&O05&M`Uit#is;vWZ3p8HF3c~g*@heLM)Ju`MyQKw(0#}uOXi3+Ffeq z0nL2*h^-ydR1x>y#uYje=p^Z^2Ge=U(IN8#5<5lgxV*F-g&bZ zv)!b^AgmI7vacS6?>!P@eJQ#D)%xS!J-W2ZGhNr$v67IG@FH=dfIkNpHv(kTn^-O3xkwS| z$|)MSsY;?FkI-NT8l4Y;B$V+LN?2k|#e+mREfO+};&D)o)UOz_M|eG0zDFUc$vQZ2 zpSphUtLJ7;Q1s0#uh*5n#kL}K1=KzD<#TC4y;Je9&p$R6GKmIUK6}-=xa~8a=KPK> z(+E2t4^q0)H5E7*#;@{25c#Nv&$|T7ZJykV?5Ykt6eHJiKg_3aK$;s)uyg&Gc(~T| z{v`15w2w2qZaFZp^+f4Zv-VZFVV!-kD)^zFvi-86H*tmic<9s@2$g&Ah#r^UwPJo^ z)HO31kf3rl>H2TA{$OQbtvYO|ypBs{Cz>8Qa?Pxu`9AzRP z#J{p=hb_37HbEEOHPuiZlP6bpOCEP91O#bYg(CW&1%(e|@Q)6uo?Obyb_$ZG(6h#9 zI@Ka`|8cj8b$QsPheAg)Phfvi1_9*1@LoY5Dacf2J+5YG;~`~ZTWs0v-}0rfhg8lpimVE2(G*aY)}r1S)*jOdk*khs!&tIX ztSn|)jzK34uG-GQ>JPqlJUYX*=|bfuC}*i&DLKiNat9CR6hg|iWvc<+G|dlR-up-0 zh900s`{c+MhBlZtF*X82_U?W~HMx7%ss!DAj+qo-NFm&@NkIojx>x~tVz`#8AA@aQ zg3+sy-YYQ&?;jHWvs1#4F6#=msb4!XPv;8MPr>qQ?TSMkE>?lWhew)+T^VY8w4TCY z!T1xiyNESDbB&x0eqEz&zgh?f$633 z896=T79z>c6*0zs(?tj`^=X&he|}cnOoek;tW>Faxop|#uZ&7Qf7pG$)^q6%W$;`N z)|9|NOQaAq*mFE6%mNcm_#TXH}+*DV$M zc#OCfu0{ggI+7qERuC$+DIz)}v9v>8VT^M>^mBm)2G$87B=3Qn-aPXd&5py6J3?!m zCEM%gYe+G}Wk`H}&fT*(KaKpM(`@)ZgzG|zuZ&}+Arg=ix^$>Rf3`3UC>T7QK-zf% zl9-s3mA{CznSztA*Xdqb(I!`{=r7w>2_e&pD_q`Gq%G&%lh?viKk%Ix%K_MPH36zrIZv5jA=j24eU_8 zFy0lH_l06>N|LZ~GKcgWH@_QvrW0^=s?+ar_J`G_1_o~LsDEA*KsaB2pp>~SfDSrZSOr|C$K{#~>Y!bcCD@oDseX4Bmq z@F6`7oWH`wCFSj#=oSUN3tLy@Yyaq3vk2_dE?@N|tco5xZ+j8VgBXurTa40rTX_}% z_lz#`_;vT!5r;V?h!>;BEx>OE;(EDpM>o7sO~~-kNj1miqVYR{Vu1eMd47hr5BTj^ zNia5lHk^p(#cFghZo;L^(%FOuqI&m<1D z-?v$JEMPg0Z3j)I;Hf2gx_>TsQ-lzGkOQ0?K2hQ(qMPJCt6-qDnc?@xus>+N_6Zpq zecH3J%@j7Fy1aZ>D>oZ1Q7oL>UKXLKxLv{H*u>kpvg*PYSh?~GEx0)MDGY5XN^ihm78FoZ{*`1#^ zh`Map3mt@0aTe~oe-E=$Fj{zDF$(G~xleV(7QnuFH1$W zpeVq9Jg@T-xk1XM-yg`i##w(1=B|Zlw$YXV+bDVozKN@&fBic#A7-!Wk5pedt3TXK zl;`=@?&>E`{>{%dFdY=aMIKAZh?X)(u`3jk?a{VyMk>h!^;n)q^GwQ=S;w4WNKWO- zOtmi$)ToT!qy5QNNmJKo6~Mv}FBcRJfeZrr?XmD5QF*(Gkf)!nf`xF`4ym(r{@n|H z*wwBlq8{9xc6)Xy#4Rb&7j$T}ZwRw#nEiuivELThqO}1FuYs&HD*lue3wQ0|-)L}u ztD~x}guEe=YpQ4C<6IgZWT>5(;df|rQ;rP&9CKd`Ye6JL;KKVP8lQG--ytVQeweN- z+5R``LAB7#-cBfF)y*FLh*^3z@K;-_$d*CnL(XQqb*dKR?`dGk8xCFBcT4Zky!eEI zpEi{2ir;u;u8n5rdmVkSA1elqD2|&6+&Cvjl+UxM zr6_~LLWE>1%5y10vFC{?QZxNWY2ya#ibn;}3%W#_4&iMPlrehzruPGDi!-AQLRCMN z7xqxiRrio?$O)n?dG&?j%|Au@FZY$sp+s=??6RsKCpmKbyz-p%U(po zD(B#_mlL?Dv+@jL@@~`VzUHR!vi=lYZ@tD>_LLqdvuw~46&H#{l?nJGZ{*%}wsKG}y1dWd~S ztC7R5>UU9E6NUYZ8mvqic&+dj#2CrH|0wj`ou*PT^4TDt$F)TRR1hI&OtmY}&+)?!a?bhB6Ls@>I`z z1vLb#-_V=u$4)voj+WH*&)Jv->csDR-YyyE{hnKol3{?i^WBFOfqA`#{V(5{DKL_J zj|2d89=x?`+K@aDxQcJBF=uC|9ISkF&X8ndC|k1xK#=#LN(@;awoENw70$S} zhE~Tq5BmqYKf&YLa?lXbqHLe`=O-}X296;wq8o7ngb(BKkm&v@926FEc+8Kcf<*k2 zW9McjZ#@s`SSarDEMERDaU`P3#N)*cMFxow?Xjb2ycsHKNk z+8$CD>9}auZzRNQk0Aui+^x29iK~Y{h_ku78b=BoHu%(|1lQD7(tCEcPq);G=$)qB z@|ldl)mv+}om9)p>L7)jcoDMIeIdu@u{Ilf^aXR@Hh}SR*V;0?GUirIan3Q6z)z-! z{%Oi#Efk|J6_^!d9Y4j2S~@}+>c_*_L{x`kfep2(AfEpMc*?shEAzD$9f)_=hbHq^j95iz{gcNz1E02X zl8=AouXHzRgv&`!V?@@@wyzW)+?5;&aYBG0)_NQcmG zvIn#QQyMcQRdyBEqR<&@G;^VniP-DrGbyZgY9sxQ1vk9&gb@6zP0H`?C>sq*oq$7b z1k4{IV*0@23~CemQh=WwbDuvQmdAu9iT^%1|BfakN8yM)gXD6t)W!tks*4%pVMntWz;?}7nR zi_sv02=pv~b--K$Fr)lVKPqrH0l%FXSoPuW&RkVm{_lJEIP&ec<|-J+3RQYOKk|u#96o=UIj~Wi zmmF*Fy7EIIivbgX3Pgv~4%%kqt5QTp?VfpOtX?^)KN9CJ4q0~s1Kx&*j#r!85r?fo zHEKUi#h$LA|6?xDwt5-R!rl8jwh+K5hU=nbLA_A>7qYNKi@^}Cgjn+ALw#(lVUnY3 z+OqN^eFbCUPCK2r;oPsJntu{#O@m+Gg-Y_ext9W}dJ_Lbx_6?!TM~43PgPJuHFk}4 z_1H@IyM$9vXBO>xgwuB0P0$fO;NLV$S*R1fjU_i*HCFzJvl@(w!8u9H+oH?i6n|SWJx%4kO=R`{IL$!<*_0}>*f=t`p$?m;eO?&j4pwyl` ze!tNg$;o`T#G3Y>M*W`}taU{!X{_l!=4BakTbAgzTTyMe;UH(opqk!p$Wqe8=S0sT zqU?LARe#ohfY=?`cStSbh(E09zF%m+P~S$7k6P!|jQNJUGYY-9>~Y7mlZoQ*sOlXj z;bOoYoREW>_fV2N3uNTsIz8z_3YYKIy?=Sjz5)t$=Kc-hyL%3We0t!o4%MzS_M5tIK{Ex8{aW{+P&Ed z8y{8633!ewI%IF>G*H&_);+5%J?LTo$kT0b{83NtV07x1!kL$YTI=87$_G(uotTU~ zl^0b$_#LH9(<|-r2V9ud4ZZWL1T|i6Hy2^Fb)@=vhDy*El>Dc-^RJtGZa+~)Gv`Yo zwcvizd|jTr(i;BexM~fRa=yJwr*7cMT7W|JQfLQf2wj^u+Ge`zto!MJ*K^%7Xs3DM z)?+mfb<8AK{t=0(4K22&3AUvu|6^cYni)If${nQV#^mAeFi~N8phxTZp0Wb~tsqcOKVaM`{3}a)D z9pTJMa8$keIe4b2=U}_;+Zn#?(BOW@s{cYs7x-@zUGQ_RQ^8zdVv%=;Jtumkm722{ z;c+YYza>J}r}n1-AO@jM&o<|XyrT*BbE+2^-~Y$Zm4`F=!0`~0SQ2si8%+LSt^Stl#9^dEteopK|x%R|h z#T37HNxx(1#;n%gTc=8PzPg^CE{i_u{hUwu2Ao9{`?qOLxCU+3Atrb`ha2 z0S)SuCgl>N-iAX5noL6K+oguLkH-vVbcx9kY8O|^Kz*=$ZDab&J@9;yS-Jw-Tv#15 zAr{eGzsLgFefKPVZQZ(!^=fzy2OL+D) zlq2*%2LW0*t24mondLFOJcBfN5%_l2pj?!WAr=eHVr=DkfQZqRe-=kQT;=HRLQr!fV zoL{l9xf3kk&Cj=O2E}xOlI-HC4=lK6Ek~nVeQXPFb?1Pe9KF7bol*YxAz9wida#=d zJZljE?+P_w!3iEQ>%9MIP!S;ZFDx(S`T05OvN^#FRV<;7p&I$QpxnloiEgEgg?;2 zHwOAG`dQA%P~v!A5UPXwG?ERNjbkGwt02kEj^L}tHR2Hb#2;ia{-)84gX&q{vLXZ7x5G$wynIf)25|KP(A#-b1Ue_x}49BoVXVkkIJ|Jz{Ww`GC|6N3`_dC zgCYfdgxk1;hn7sS(G>64XZDBi9<{@daNJ#M$_dbZNI~05&XstEAo_if-#HldRwDkz zeKVS9-0W}O+H8Yu1!-WeV>v5^YnY zBC>0LqYo-JhI~b5TS$N0tT5CeP*%!#(q)2p2zp}+K)ItoKXifPuQ6r^dOaI*CFdKT zPIq_&{6RF7N{81eAwKx@JRbFIg#&+1%C8|a*mI*_O8{kpFhj$jDt>x}q`rd!wVZkI z@Q);6CBRZdWC?XF+da9dqeNt^8NMtt$mhfp(sD{?NmBZ~L;3sx`*krsw!uT! zwtO+q8WUGc7GwF0hZL09tG7hIp2_Ka%5YBydt!zn_>-+NRpm?p2 zU$$gm!x4%>H?=5t5o7#J^}j+9aI?xcjUvXshM$#BX`Y|GuxYdBW++1&(&Xy&s0`I^ zh;Xub4uAc&6dLc8|3_C6Z1-@(}zoNCXJwCu4x4e0B!aVxAe@@Fe|X7 zp?TQDRn#jbV+(sA%K6STD3zLGTKQF-;;!)VQM2OXl6=gv7HQY#cktVRRjluBGjB*c?eq5t_kVhba0pfP#b{xCJ{$6*nCW%jP@ae z596;U;@lJ{LwRL?m0!N!?ss$D2678@VsT}r!fPv`JZPZbVBn`_LZQ-a(m z1SU$wmXDu)tbeiQ)vd=56<%rYJZ)9xu{oy`GjbEV6m%0iBm>y`00z6n&h1Szi#OPC z(l7Us%9b?Cnyw;hr=vqP-|^9lf`86q#VdKIt#&y%l#I-oi22p$ujZjDZe$L2#&LBF zxHPpZZ%oB+Nd4JQhk4_+5!tzhI=p%r^|mrTicmWGAR0%_IH%+5ylU?wN_uO&g_PUC z<*TGaxhGcH%YHoX_5}Wx-xT#E$oYIkK4{5q=n)E{R5C#GZdsp=Y*QU|h}Q1qrv*r_ z7ow|29}Gc41)6PY1bki$zabcn8rUs1DW|S1s&{-`aoG&lzRRZzxzhQZDAsSZJzLh4 zIJYM+dTkIzLw zF(=e0+tBu*mbP~#$( zFOoMABQ7@*Em}~^Gi~3lF9N$!X@l!aOCI_MDfViP(vYAtbTl#DXk z$<$%E%r@#hCghNgMK9 z02bFSUSaHf**?kFZYjbauWx>hhG?+~J4Hd_G2Z*U#LUg9*4>gjFG%`p59nikOdXNn z5^{C7)C*&AREi(X_WDMeGY`Q7;=56>tR`@2<6ZxMe81&>_zqj>KmUPD3=sRdUs#mXyZI%#tYCK+pHG!;=x6l z=+b3q#aIq2J3l0bWHup4w|^n-#gXAcj%&2I4(SZmP|p>kj28;uc`@^5-gP)Q2k~j6 ztbz%?UJ8}l2dDkcY|y4h?zv^W=@a~k*39Z{GB{eYC+ys7ZaDa*^+ox!)% z+uHmizmR_FnPOTd&u{6Tx?%Lrqt0YFthNc+6P_NuZ6Cb&_e@LqJq{XDY>Zuv(87ym zw0KrHLp_~CpQ=8Xy+F#jUC=Gu%koXr?njDS(aJx$kfQa zVE(HOLQ8s{%VpEi$2AU2Ugb=BpQu`QUW%Fn34z=t*5=Ej-z6!WEy6xux1}kiI^3sW zwZizDeO1={GtqoIFze2z zCLwhqN_?gsw;n2H)EKuO$C!h2=QrZpl^kS1?q zDmb@$X$Q^BR9y74IJXm<$uV*5wRHZG}NoROOc!;s(&4pw-y9VZkaO>yO>J2 z5aN)Li2qXP^o7s@{xSA;y<2dA_IkB#a>!c$s)8m7<9 z_zd1?G_rM~OBzbWAK;D@Ka$+NIfz~C@|7f4Vr(xefd!uqQew&~{i(VAlw|`?tsfFd zrGRt7+m+1O>o)^0Y22-4ImshQ?HyQh4HxLVsgC=DYY-4iG}7OBU9{^9Qc8EgT8%zH zytzcxON;`9;W~UTQ0HIj(W`P=NWIWl6|!|u@Kd&?D*64J%$@-tDPY2^=h?XuI_2yB zY>UXopQLbd0?z0+7+8*Ct0HBjoZReQc2(R9n+^cHO5 zW@@0$&5d~*ioB66`t_vu%@ui808fRE;o$}%!YyMz%7(D z*n#j5yoF*8nEGDNw)x5qxTrs?H0lox?xqxd@L}hV_Kwr4N~IHgpg_S%gG`59;51vVcZKZ1=6%mybX^I%v1#_&mJ;Gi zG5+7b+E$spBW`Te>Gfcpb*hb22RT92M^B*<<}#H6`E<^?T!bj&gS_rMX5Ifhqb~l; zp7PRdFPZ+RBy2y159QCFwzL> zM;L6E2E>h2f;^ievb}LGP?J%=`M{z={1JMRZ@S<&0Coo1GzLs(#w)^H@pCQHQFL>` zr6gxW*$2ey2PwkTjVs<#oZ_=-j0D|ngPYZ@0@G_=CVh+7iHla<;nd7k0qKPVol%?R zJmC)*1G`Hi)<X;H|v>lTP_P?#^paLqGi;+ck#X7Qo68WGVjsgrep~oMA@5XC{=rW$ov4pJsDHtqSO|yu1a^y&NS4Kr@xVnlzX7xo=_fd; za0K@abAz)Lu&8pu_UTh;n&h#^SXt1%92Dj_q-%di21a_TNcht1_ul@2T^uwbZLniQ zHn-hT?XRl&VQ{Enx_`J#YptNcM$Si%w2o)4P96gNlWRMH$~j`F zn&F~%%~I&5A9cp}wK2b`c4(HMCM#A#NQ=KCyIWOdL5hwpx#Xi0LBQ&r(DlvHglITv zS^)GDca~t#!URq&(yGOfN`?U$J}NfcA%wJIjmM;O~FF zRS<-)o6+nw{aIv$$Ldr5IWMwAeHV+jUxTV+-8_Meorn6fq}S9+(1(WU)ZP8+q+}Jd zm<{C0boNocL}GD~GRY1lbqH-aShUf)P5xw0vVlRnvx~PyfDv>3G##sX)*X*{#vKWl zlz(afY5g3U8WI8li& z^rwXd(O`WFKL7;pgsNJ902UX&4=G;fVqBBw)By~D@Re1N2ywAX*AE+AyM?`=>$|Ip zWdgBt`W8Q#pr;Vkq8R2K*{?dt7mmOtuaspx<7x5jwj@6ew;XRXfasA*oSAY_uBr{I z`D5g4kG{JKRwpdn&?PW|@J~FZTlm!n@$WOhWyK~%%A5L;j*To63DB*&OCZx0VgDH6 zytZDD%1i9dFfe7KM&7xqJEu*DD9-rE*f#O8RbvANaRrRsThb-FJG4vyE?dwNe@wP1 zPu9%YH0F5h+G!-}=!<-@zDNIM6}V-#=uLR@&n6~7m@42?f7;ky)GA(@?kY!LTyMlT ze{SNP8*_4606kY}^f~+#CGe)d-Rl-ZuXH zA8QQn*qx8YOY4s)a}eE__6+kE5j7()$GI)@EWf8FpF=dIX7oHQR-UuL?rHE~$VK(! zJy+=;Cs4aidv~}n+J@AIXG$?>b?SzkMUFYCY{Ug<)680a;xYO~t2F#5K@$k__+ABRVLRX8{bUMH(a%uQ#SmHFK?h9pHW$dIU1eM)?}CrDq%63f)lCIWpwR~QN+@bh8Y zM8#m9zwP3$!S6@7lVALDR!H6k%DhiEE10*>xN%1-&>WuXCWn!HE92cAFkPWl6PTd= z%lY`mSFo1}FXKC zZv2l`=0^_qmQ<B3*K4|g-p6lw@oExnc8-_HHV?zjG}$uK!I?Dr#knH zO^D-C3$EfhDGOtrc*W;_R}QST-@_8>9pI$qb1k-^+!{0^C-PN6&=aVDJ%!rfIo^zf zzp#%Xvm(BWvFI32OjvYf^Ib>`?XV>_kL1PVHCRT{h=W|Mm}0v(g_Kt_#0oKO)Nd^> z&q>GC)Z%viHB#;LD+QZV5@~M0D6ysdJlgPJM`OpS@o*8oiu|2RDVK)o`Ek-5jqN1t z!w*O1cC=FD$(A?R!dQmPSwTY%YLt{N@fg|Nj6{(7Qx-;32?+Q`h*~vlq#YL$sQy(f##jbP+$2}ziJne)-L1w<2G%(rr@*Z<3Mnf9eV!?jY^;6St--u@;p9s>Rud} zTXc49;yQ7v$yga%<_(Oqe$}q^1y!b4zuO*-YcJzvD_fD{FcYHn(SP=7G{I$m_ zFD#uKRClOMZ(je1ALlcOW;ZpL+$yv=540~{|4B0lpvR@BIi_2V>-4x3hA3UiDSD$u zXjI^qguM!2rJW!374-bf#W*a)jbmDl@u?>2{OQ{%-Sf3pAxF2D_U-U|zc01UC|D|_ zIbykbq!`X^k$Xabxi4K^X6ur-+it4%a-vNX-&|eTx}J!tlc6dC`9ki^t%a;OyS9$h z_J{^oSIPUobP}Z4IEB(vq$uC)^UaYxq|C3^7erw9MgGm6Hn+~ElihxyqQRW%F_2%q zaTpDIyB*Q7#Ot1s{=AecdUTboZeA||*@T|-MyHtQ9keiv67nxpFn1StFb_h?x=)H8OL*v5gCV*D zsr9B0ORtGp&j`Xg!@iRX-q2}@lMf{81NN?h{x;p-c^u@|hADa`<;~K^E)!$ix5|>^ z60EUjjXa3Te}XYaRWj_+yVBTJFm>-m==9NN==^B*Hb7;9V+(r9clShjd?OtYI~b+b zVcB}~k3*3Yl}eDgvHnH66TmAJz27>(g@}l9-5KN^-l7c#`G|IM(zlE6lh3{qX@xcI zU7}qZ%}_I)D^#I@2V9^%7Sf?RuR-YU*1qaD0V+m^!CeISuN5|r1LO*=KLd9a^KJ=D zQ{cv3$CL-j!d6_{KM0s-Q%{riAu|)*((J!EwO*>x4$v}{Q$-;U`wu=1(3D?(_;G3EI;&vPxOKzRzLT zfT$Z>1#!g@ocA?PvCLoB=SJrz$bKs75!yb%A~@=ebBHGF%{>a76b_!O;01jdE-b8> z3e1KorfP?L(D!Di-*Oyh=X@xUn`q5_^2XYZSP=UbdZjeB4HHN{OTi}&j#q#D*(RfQ ze2orWzfM_`JwIl@AGk&XJ^a2(Nj@`+S=FF7h6g zW_!M+$YEP_8X?+)O%^f@@;Cgn!k5J%KHs1NF0j=UZ*+-OL+xs>I9QDqkK=G-54JFK z0LGH)(x=nXY{H9&XeSIw=i-6WMCi`TU+MKJ+vW~EyAXM@wMXMXP&wq3f&PuYKy4HW zN-gHbD7-wk@e!^?Y!=qk`Lt^y5Z%3xVudv2O2W!B(B$DKam0sO;prIw#av2{ zhHiapL^VZ=F+!H>SjhrKiF)=T?tZm@b8k!ld13EgGCPg?Xj^)uMbN~3an%|AY<>kCX5ZSz5IxkG8@y4!9+)X1nRJ#B*a77Z}wk8_`Y{3TqwhRp6aAXU|ef`R@rBHce=hPTN;480{nf;?230>Tk zC+8dlZ;K`pBSj1KH`$bsALGUYey{K-l=APw6OZ{t{MwoEhH5Ecg+K5&2;|i293wN3!=y1yLgP43RH7@Lv1+;DN7u?+vu@tL$(ktjsp=C;KUDX zeBMt(xp^?aAAZrc-53$|jh`ypD_25#Gyj9@`8C2GsL(tkUXrU_4(AJPYf{wNJnILC zJ$v5Lzm@O-6-cTf;J%zvWfMKTv0GM;rr&Z}VSFCry#0s6( zC-VX_hrmN~5_cXGUkHZs{^RRmQ4I*;Y4ljs+2P80#%i?qOp#D5JirE6UWyHcD1SgH zxt&OrzgiOYa)@QhEU-(K3UUefz9OVC8^n_H3_H=Wa6r*ga58rw|A9EQf@SSn#l7F$ zK00jVp|R%~9%O3XiK>6f7Z%zk1HWOtu$hbB^{dXNHy24T zHoq}1+V!uWX0eq=U}lv?K#w7XRJkzDE6YSu74jdQF|LjwKI8#-lIO(2ezy;p`^+@+kAIBD9p!C z8eI0xY4yQ#j@cvTX8;wTuc+JZ7x3OGqU0mjDMdhTA-Sx(Gzw&&IuP&+5~Y1VkQ$#- zu3B2nl|Z)5n(a9psugb7>RQ7`zHXYgioep`H1sasWO?nmpiTlHmh-?S z})H7dcbw)CnFbE$7Z^XJ2 z*_L4=IBV`G;vX$&(+Y~z_>6FPGBjms&tL(XZw0qy1{=0vc&eEprL=9D;Mkg4U&H!2 zA`=(a%Q+Lqc(<>62D9h>TDOo(ZX^3a>!w$6idf)K-epOA%dTL|NnVb1X|fI(KvQxV4}dDS1<0%JWPZU(xJBHYPVugiA_8`775reNfQMrh!5Jzt_W1m)u^%zYy1XLTe8px^Dba0{fAp6TuAtR9-=y2L8Vgj@R#O$(v2EH z)Wvmw@}DE?1$GykOkj_!m;-g_f+V(YnTMv09Tb<^J+H}L{(Jv<@m}V(Q;`T_t!!T7 z)O})E|4`gLD3vs!9i#4#%{y&>J+7=TEsE8ysRmFq>ctlml-@?xKN$@>o(k?guQB#8 z85dB_;_r-gFLl(e>P42qZqpw6h2JI$H{0;yly=>8HT+i-rwhvyPabG9!{1Du@ygqG z#bf_2?k5FdILRJ+lUC+1Fj9pas^FD2b zFFO#Faz8x{ocier-B(7KlJsz`gZ)<|WHo(y;v(pA4R39H@a-ck3Q_XHk2v|Yb6&~>#%dq>OCqQocb_f_3B>nC(;kV$Q(uy~SggX5uQ-h8 zubW}oz?#9AcM7csf|i2;2i>8k*&`_p>F{1jsk(l@{jwfmf#=umCXNQQ8pxzMW!rc; zMcCFq7THNq@_sMEe!D8&8;fO@@yolR{-KsP2I=nE3J}=40!uQ8Jf-S9xSoULx!V{Q zwa({I+~K>Th@0Vz?~r@+T!KyAzwX3`IhLe|6>kg8jq%ZbTJmK1eTXHx6EWJz0=&B( zPGkoce+L-ej0d?*^AcRUh+XzXcY|r8kb9JvNTbEaUT((d&eu{s*nDqx<56=NZ~Z>w zU90{;Bxy(~$N4tUAEMmoDN}N(aAPiu)I?cN<^{!L0;?^TH1bS^`(=Vmpo8(ve~@x-q%*D)>XkHL=cGf{lh()21=sRR~8kc zY>EA7-{OMaHiZNtavWCgXBC0P2v3^l?w8xD^P^&`co}X;wT_mcoO4MB_Ivjgth%-t z#a3U5mLqA3NWKm;m`fD;4e9}8v_~9;Nao*#jOMbSO^-d}_smqgjfNW)80Y<5g9W=? zuN?N!5yo%S;z-tIU7#y7F?1V-?mTE;h*hku`82p{LLXh7T+(I$y&VZK(*Awf*_@}* zj;DuS4+tx~%d4j&P=AJyrh;WRD9X?iv)b1un?q#qzMhM$q@tsmtF~+7AXX=aA z7!MLxV9K3#qLPVj`np6FO#pFl)?z*&cDNE$rnI>tAQ_EYgRnKf>~V+gTpk?f{gGQC z24OtWN%|MlW7Qi(SD1BbOZDJjPbbWrX46iP%E>K^>mhDJzh_Z%tpeGaLM>8$iTKwmw1zYb}N_q636ymp%n-SKB0fB!g` z7vAD@`RHEe{3=(fL2GWA&V4Zf!~dYoiIH26n`cVC781^E{Mirfsg$ej(Vnd*=QTZ` zCOPf{(d`?Z^IE1Nk@b3ff0_!2TnOCvUa}grr4=OLYAAA##6B*XAk5#}AcL(uQOkiC zCQfB{&yqKf%v_LYE|}%lbQ(iiTeY`xU7M8jdQaqQdSP#_2qVnTa)y#pqoth|is^mj zMJp)}CyA;2>4>wBf-++xQw!`jg7L9HtA-x$3O?^&?t81)0cT!zfyRvEia!0Z>>;gr zb~De8*_x9J2VNf$;4S436^PuKuA_f{oEmMZT`$NyH}&*;LQwsggl+4#+?De8^-mNX z#dark8RxMa9zN)m(l>&nJ!hhctePU`p=f#ULD*U1eb?j@((2&EA77mi>jd`-WQmaj z`$_#pAIp(qd{E+_NPu2Fw~kn6Z>G<*WMxWx^&MA)NQr=CKoRL`y2?~6uM` z@ce-~@m2R!JA5jzaqT!b0ygU_wau06ZV2tr>1h6C6fUqQ5-kpHWMh2Zo_Zdoc}H(+ zJ8?;aT~wE0D$;GNhXr=xzn zpn~6S+`fz=QYOi}P|U}bVz4`rBfy`nkG2wUk>sx;6pZ;Tm?P^1Ac{q)waONM9ey^c zve^w28duL=p(|{+9jM?3jBO)uRYd|jg)3@fEL86CLNQSmduS1ne+`$fz$;w23;jF4p28 zEp4~Z=0Aim`)p4Gd+2s`kzA{|Q1M*a#v9@`_0D6-69iJLA+l3Z%%_E^cF1w+Su3{C z;I_!S?hn5#>|+_z#2L2B}3y~d4k4||MsUC{7Y^nR+p#5VREU>AM&9X3ZI6qkqeW;jJutJ5E#4?Haypv*D{O zp+1SJMS9Ww8tw!G%^ba~Cvk$~_3r@N^#hnkVn0OfwhIH@r)2iA)vXEns_pU%2OOAq zE90HtoheMYWpszCW+RC7irtoH>OzAx>tcfifd0Ao9YNQcu9p?|^0xPjj81Y`tr+vk z+i4T8pnEsTpxQBT50t@6#)!%ph^cOd5^&ttKf(KDdOskr(&aX_6hVwyCjCsT2F+k^ zqqROrPE&qCj1(#I^ArNhtdJ)^3r@cWc=jI${4*6yOCH{7^DVV=h*fU9Ya4<|+RP6f zu@Z7Aw0$^ja-80iT}0`K6V*1H^}t%=ZyeT6N*||U+Y4H(n#jJg_UX(~$_|_jN28#& z0^9_*vF>+32xj4ak5$WDcZ!TBSCHi*_hDq}2bpKo8_y1n8y^5yM{X=4zI>I2g}~|W zL`e~Dq*OcWbql|vo)Ddne+X#RKZJwh55{mhA4RuTyFt!E{V=!b7EJ#bL*{d}l!)4k zIpaLNDFzH|O3ZfBIpQXZ5$(B*tVdHq*85W725WYl@K`V>k(|WU+B;@D^5~FD#5m;X zvIn$#JuUdwriHLkzuz6t3QwuBHh5#x&a*X;%YQzNI2hfrsGhjgK)L9}8T*0VXZA?G z!EVljEO1BvPA3v0$^FXDkO=JW+eRaWVnn_{9|6bAK;moEy=(qsIsW6KN)(3^$k?So z`xCb5gG57`o%Zpkh-Uk4G2~3!t`0pVguUrw&)NnlV%PR5@iYmza950w zDZAqhn!GBG)G#+*-tc2NY1q8(JZ@={UpHA{5bnK6=2T_s78yUo~)~Sz7i`w?5G}S ziPc`%1Farl&xaedsInXxXWk{eQci|4inq&_LT1)`Vx|s1Mg4RYVJy-P2Ao9Z2J!0g z!oA%;vf@v^`{^y-?Q~l_sz{~<*Xu~+zNq#i+iqx9!vppypEioA*N|d2!cfq_b(OB7 zv)D!nj-%#VOtZCY!VRQ^INtO{i)`rBgbpo|Sxb^?4g>@9cNAeiU+ZqKh+69($%A_g zhE631bzv6xZz`d!YAPE5tS&%{hLCvepn#+ClKgcO(7-CSjba?%Se#rWKF*x|qS}iK zG^@V9p9+-_?5Ehb+0XCT(oGH{6N}}F8Z&wVY8Q8bsF8d=Vu3s}aD3b>8T9<2AKfPa z;=0s;=9?=s7A1Dz4)Fk?Dkg@o-`XW5$VP$VQj@lHSaorIJl*0*XhALRutOcvi{~<` za|Ato<_j8SZ3_@P41+(-Op{vB6aBnz7`RsUSZ6ND>QIo~QQ>(>$KbT~d9+{px=^|BHaw8%pic;3)J8*O{Yv#u#+Vh$6RJ^xBP3?_25%17`~nW|nJuvex2Imww{BT zmG7Phw|A2%c`H0>BfH;X0yKD!Ql&*P&)QQv0)%(o=eYyDBloP>BJBSw4+B@Vdk_`c zMw<0C=x2cfW$6M*+o`OrfW$bZr49LT`2Fo0cIA0=k5At5Gd+lct>5ThYh3}=?}Yt1 zEQ&&u64zzaolk_~3UaG1`vkAS$BoTZdp2z9t7a(~kEcn?Z}zB64=D!wwqAGFx5jj&0y`VZdnRpInO$}2|s?iH#MOaqxni)g8z@77w6u=k^e z1uJM;|cuZ#{tUVylys5(C7D+-01f3WE$4$L357d*Xg%u_3%$Z*MJ( z1+Er4K*kko561cq96iTLRgj;3fu2@l`-da*GFo+?Nmd#Ws3JKww*Jbd|NLtoNCp;b zX}vezDGNjKq=$h}q+5i88a)|(EkmxP_7o(yBXWc9ZdTl(M9B3?zHx19ap;ZRRg=p= z3s(>^8{!+y&e7O^*tE@d2;M#5J1sQsZ|e+d=EY+C6}BG<6DC&vd7kz`ZAGn2jbEN^ z|Ktr?)PyvDa{i%vz|HM(o$+>`Yw?_)^!8WIk6cN+v2c8~i)z*ZR|C6oF$eUX(CZcUiH|$Gg5YG{w<4l0G58}0!c6%eV%F>1P;_y*WO_Fd0(GxIuAH8Y?$Y&)LFq7Ga!=w-LON>u1Cd*;ArHFBSvN4>`vtTy!& z+>g(n7U&j?8y~$UoQwqjRvTNqfjWQWNe)r*Y>g?k*PAX{Q6au5&f(0T_T|vHfBM+F zW*V7}0>&PZ*Ap`F;LN|=8WR3G*yt-c^Yx$=Ihvs!!cFfWZ!Unjqq^<9 z5x@(xx*xZJ;N2OuVtt_Lz;A_Dv$>Zl;?%@0BwE;Y=nvLo&M0`axc3y=C3F~+7>8lD z_&x%K>X;ZSadjP0@gYnKtMC1clsuZbI|6ebIf`yo2m&8G^c}N9?vNWlJZ~~+!5R_@ z*)63QJF5`FJ)z0Z;&fA|F;-(0hV&*U#eP(VZqVd0?Q9o!w=uEqCdT(W^Kq-BC!}$3 zPPBEOO`Z*rq`LR(PrEz2_uc;66}q=JW8i{Sh+`Ac=xpD=re38v^M?7ezS8t)$G5%4 z-?o}86IJYKq8S?a%$lVGMliY^oiE5Tiv%m0iD`O5J1XN8gN9O+3i#7bXstBrCANbFCp z$VHWeiH`eQur$Jcv4#|v^Wts1v?H}0blclB3U>N15{MIq*@Sjvcb+NG|JxxG%`eaY zCHCDp!g0MXtEWTDPzToFB5goiZJzWv4NdAWq-svf-`p5fW44p zxYnBsjQE|0w4T1(;VX;3HHb^8I43mE1db(tDeHb-FNU3Ec^JRy7oy(a&;xVTjnubWzUF*}<+pX(n8y6IH# zACZR-{Ry-0<+M1v?lIn8#{ZHOVEF5Lx3N6&w#_?R0@!GflC7kR(>eDHgYiBqqqJ1nx- z*GDN#1i|H8>PymBIr~V*r+(2bX3)EJ4)S$*HCSJz-n~VuqwhZgZh&6pvEz`OAm;}c z8^ZJfGCd@acNVQesHII=#TsO`EMCB@@cNXZ-zNqW$K~OYIO`7;IphAz!E#mr*kMX! z0X-S%Dp0`*%Bk#kUxK?ydeMC#Hq$J2USG2A5jxI5%6tuW9VgBt8V1HIsfbm z!7aTX=st^wmup2+f2?y+0GW2q1@PVVaX^-g?{SrKK(G2p7GAL4H4M{Ma0zvJFfw}g zfQUWb`xA6~rKIL!2&M_j3;p1Kw$+Aj3=v|ZTF~l7JV$6u4V7%*c6uYT$hv_hAWgQA z18BjC7@(S}WKZt%?`OQ1Mss+LP3n%#H9UQJYzL6r1ufa5{({(LKMR*RFZj(O1q;EJ zJ_m)3&oi9FPfj@^pLhAkUUt)t3x_RrWH9Z60g?ZjZGkDuIaMbwy=>yvD@Zu4v!v2d zMz0U?(;wA1yW=*?m{N!}V{2I?tU(h}v=UUtGf#P-+-sHp;-wLTN zpQkB*ZX)1(HRVv+eO9AMQ?`8snVJjuqhb`hR@KOPvXZj#4y@r_7i?5hI4eH;_o70> z$B=CVE7j3Mhrk{u-VSlmMnJzA?ZY0Ou<>*+(JEv}-XqUwWpW@)lJ}_UxQlykAz|6;*+9$EB8^b9*S#(DhhukP|G`HA10Eh#6bCpbQm| zJ-SA<2&ufcE@$;A;9UVCvZfkp7-AO#e`fzSBS~%Grl6Js=|8V1N%VW&Dn>M9;uq`A zuic=<%SY8@zj3ZkfcBvV>J??XqfoYvSH@`<07OtY$((+eNW1oZAaGG@hF)M-Fu3S; z|M7lu)1ufj_^@vOovQF_K?BdxZNx~!o- z;XvA%{(vHS@rp^F+Qzz{-R{V_LKA_T9S}N>CsDu4=b@LfqGq(n{i?k9yb71=(7?{@ zUb5HZ>yJvgcgpSC_-E&30eHpg?8FcsGOPO0{Y^YC(EB}fYatCqTJues>la&~rhR2Y z#M(*i&{H_>&((8Bo!Dpzoz#xppi_h0Kb6R&yb!G}W`{Hcsccy-Z~6F$`knD#UUKh= zTY&I|ylKPqrq_fDAJnKFT}X4RZ?!WemEXF)U=H)vT6tziD+z(G17%|kg8M1>k`nt) z*yQ1jofpseLpW7WegLL)N~9K!ErcL>6=#Ty!k}U7>~%n{Uz?FQMvPvz>rH>1Np0QO zcyXrQPBD!7$exraGL;qy{u3yJRVb4Qa*dSh;22NgL&fg2++XKIKEWzlM%cLr$8`@Q zw|&THvGi#0+sk2MLN&~@f`xP-Mo23rVlta~YoJalBy2OKqpZoI3uZpcNv{dLPO;a5 z)6z2RnH1)M&(JKj$*kV`i7f2iplm3*PHwqBFg+v$!MA%XSD>piIfWbe?9bQUHm6nA z3Nc+%Avr!*+qnQaPWz&an1dH{lRWiL^b;!bG&?RC3tO?yYBu|IIOe)`2i*Cd+@W<7 z)HPpNMPw?cH|L8ocJ1=*@4_kWOxTaU0>uB8x={^|{>LYixS4tB-NrBX7BGWJ=?_H-d^Bj>Z8s>nT|dU zoTH_$o4?Spkp7Jm$p}b^cLurD)YNNT^J&=$tQ1x{kvB{42|N-YAE{6MjtC-Y!wNdw z@mrhC8HAe5dGJKt+2HI>7}1lj|B|uB&Di}m)#zjYen4CE4S0>D{I}&d(2iSluEB<} znZse77g~WgN-UWR9nAh2m5+Ge^d_d~Z5zA0>xEcSH)rxOo!mPv*6s$M=bl%o!xGDb zc0g?VXL0|@emhO=hQCm#MnzyePnc=)#IDH46MF>(SqdSS;9)Zo#Jj4Uf4gsqu25YL z>-$e7ZsTeO>;3yqn8v(_?h1nc-r-L3UzzO5Co5>$ZVvyRw9cKAOJm*9v1GmRPX3ew z!@5ga%}qXjWg=6?k@epQ>U(ci(#fHaEH((KeB-r7x!2`sU5D>_$9@===>9Uh80fu1 zrp2i$3l5#fqcUe2CtceCE>`;r#AOe~(vioU(cpk%vq&Rw*|c;HRE?vk-YfL-T#(UHpv&n=7e0doPZlqcpx!g2bS8xbb13{c!8Y! z&*hj2dMX6g!cK~a0bvH_dn{J=^E}w1F=6tqf9C+eQ&_-$pjk1q| z>G)rP@sE0=3#Iick8o8pw>INYmr&hsTs{4t)#MbT71ER^L&+n9$8*b6!&W#V1c(@z z9{QqyYMlnKE=Ptul}&yjUOCGt|6d2+NuAS+ls|dk=g`K|C`nHZ2{1DP7{C~|(VV(byk-3SyZFn9Q%B1N)JBEZw`z34rGj^MWz z&n5R0jkO+~Br(Q0Hh)hq(J#2sjzsi%PVxh~=bn|xY-^1n{{r5ENa@4n(g!&|xg}2) z#t%D|tgQQdcUKJ=kH7#gRpUAyoY^1~Mqf&flJFw(;+>LPA-^FW$KS$I>hUziJsv&IUG0g~0ilr+V`)P$9Q*Hz)Yeg`$Y-3Q0;e4vZ}Cl>Zqz6GtZ8 zKaM9Mgi2Dmex)d9k(=#XDwR+q30o41n0sSvNkWPw4gL#Rg{-VqUbq;y@d)}Rz@?HQ7lu1w112dxMqI=-)( zoGx#BOy~&g8!8@<6~YevPbvI?$UTzixH&BlPz#s8?B?*Y02xy!;0ldpYOZ!xh}rC**R=cf`dn z4QOOuT2|jaXAP)4jCQ!0RtcJ#Qbs;9JdiM~av;>s<)C6mR9{?tz`7Ps?Qg+ts+?1H zGo&#A2b3@Au$j>pAe7y;>LmIqbW^m6S8sIdm{Zm5r|xlX1Jp76lX)b^0E{4^-|b3( zxsb!clDYo`R3Y(03eCH3zvKnb-#9PF3I#NkzDHb$iBi>I%JS-3U8vQM)mA3T=9iq0ls5$b+`b$@M8qBwnTsYH!rZTlOgY zL~o>g9w*(1Ib3MtoR~dObcK=s?Fk8?#y-02DI4Q5sJ5B%M;j=Dc9}fyS5=-U4!CvN z_ESXLrugm?m&W&5WdWiksvXShLpDAg$59>%=@#7+<>wqKRL<)O6Ve&*ZI8p~E~sM;y#|Mq{w-j8dL&4JjRTM63GbNuU3|Cst4>3Hzz)B zk_bP@AXcI#;5(@y`Yzp_rV3xE;Wi-xs?zl1&NUsv+o!oMEP(U+Gk#V5YsHl(2aPPo zh0VHykFsA4aMz!kc2rZ(%4b?}dKOUB3`gLA>PBLX8}}5A-t@s6D8eZoJ+M%wC=i&R zZLZ7GTHpSKoV-9C_;T9FVoxdY0&L^Qdd5c)=Yhdigly75FsgM(PDt z1%8aQ=p-}G4S8{x^78wg%IN~{3IEZ*u{Nb&mEh0yL|9=Db@zd*@0Bo?tN*0y+b)Tj zJgiS5v2+^#hjJ3?f#-);%WeaCbM;9snIHi_QSs;KCR6?>=bppV=)@L#Zn}%h6vAL-ZRfB_BCAHYQy<9 zE`}Nk9rGOBln$ML#cuBLmG0iV|J-=}O~jCaDG*F9!Z_6dqf_3_CQVeX9uDKGEn>AYr=O~!{Yl*D0Ow46z`V2&l{|0m{U@zaYwn7hwm1an@R(slmp`iYH~kjkK+;yV?Iri+4{PZ(3#rf zs_XZq5yW@K3x|4QRxUAkZJzj#E*)riGuF{tlZX2kLJQzNilZ(%YChKB>S!esTLlru zi&3_$w2iVV8$7;6ZUHUHDKJ;%I9fBQ4umsR&P#TaC)sIko~-sJ;n#2|&ef%17Nj zYwZqA`}?oI$Sk1ydmpQS?DdRN5%MMm_e+r1@3he@?HF_r>NzS?U$}H}ZZ%V-HS>Ay zuKitiF!_rl(}XrH_f1KkA_4sEZU&QUN;_(?z9*bB1(k^_pN1djN4ZPQO=qB6K%iAI z#I>~lF`(lrrp$zKergz8$Y1tjLJu?VUa^Q$(u@d07Uf7fDWB@ftW^{3pJE(0953p@ zIn`S$mtn$vFvxMy`JV*FQBJ|8T%1U`^U=n{N+66_O;P&bbMU8x`KK}sB z9jZn>h-ERB9z%yI_WxqV{CP)xl|2y!uwOoDm@lS}_H^nA`Lw5C{rS}6#k!>M_D3e2 zaM$ruP36oONIgPkh8(p-TQBKaYEG4k_RLjHq#1Lg_{IBWK8}_moqxNP%}v+^$Kvc|tA! zZqBDBD0Tj4nEodQqY=lR$?k)vE?^zIry@??X%q`gqQAO(eEG_=e^#5zm&m7L9F7?P zzv~!AWZF}ilheXdPK!bmFwT4->eBsqN^ipn|Aa?9R8`}&cUbRHTbk^p0WBfQ=HNzj zRwDj)3a`dvi(sU8{Kx50wk1_|;#8bTd`ZWS&vKlLoqhj1PEJT^k}la}PPHcSfBkKH z_f-=bTiGt&T48oDZO&;19v5eTw&tZvZd4Z6%@8gw+aTQdbU%R@Tm7f;ABMG)>N?h? z&P$C`fcAeKNG2gA0{mw?l8qb9L9plY-O|LhATbcyMWV7AUUE~*4DhB-t)n_y8bFQMc?a+wwa#M$*SxJ}Fq6dS3F@T(-L8yN-$pE5G*mnP={%1rnrUGUe#9 z14CxleGH2ONwcNGe!G{4=wP_En1Mv-q^EpXa7yTtaT2?7Gdg>wL3J$#sbqH8Hf5)%7j2CG;hQvYb>W|UD3@N*X zJngrPW15j&pRX7*s@$FTK((x z{L8CZf%n%6xuXYH7i_ZXL>obCPqsk?+MTkDWRj4jgn*P+{yU4(mTPn8JbEppN{rCh z?P}+H)7Q3+&fO{M+q}dz&7wKkqqiR2jXx)<1B_8?{fQhc+lo-r_KHHaGO zF1rVT-svaS8(i=1R>kG&DN0CQb6QExf(*|$WI^?t2d2lw>KAT1`T^kHTLe94VZ>$d z*&$R4@l74F&qUe+oG(AnvnI_5!~dFC^qo0Q7$hRq3IN}A5dQ%kQD0)bU&m@Q ze93fwiBHnkiEa?TC%{{K+77LGtKV=$r%KasF5fT;A6)l=-^WnkcvcV7s!je%W1q=+ zw%7-qC9Yypq@T;DkjU9=e6)7K# z;~Y)@j^-~iT5UFh4gl@!kuYtw?MR!_fXIKgSoOEz$1v-;UFay)hl_hE=#`CYi9; z^Tj5Ir-Qp3$tIhLvr<7A0mWKp6~r?U;P%y+2GaL4KEY{WmixgOlBJ*68_QILUVXHZ zUY%IDjdfFMx1oS9D^oJJ72q@f*lz05fE%T&`asjeWou{RUz3&0_YD*N%O;@&A*$093ZV2F_x+#F(}9IL^G< zZ#hQCr3*=;v^3L+zcCPHpLOeU8i4#f7>!HHv&RP0I zy>oJ$jdA*I(+GFX@S{5obCfE1;lwvPvSjZRO_D^#s?cV|x*qE);p72eiDuiL*NiB} zx*(VIGwURM=3&JRP))c9W^T^P?{b+#EZziuIHEe#lo$E?Bg@YFQHxemeHLt~=o+=P zerNUEc%dz~VZ><-N7=)Veh#x>p8JR82*K8Rn3bPH?7^|1Z6dewW#Ikp_bTw96l^HA zSaP2GvG6?tUTnl{^3tcROJ>KJn?i-SFVdB`J9e`Mw5~*@^69&GS17_|^&GBEk%xY2|EAkz7{a8*VXCl=#mMd>0*pD}hZiKY*CQYSiklF|29(^y^ zDj9e#%_Qqm7abjieiCX5t1IHS0<{HbX?6^IW z20w)>ID_2@*?97%W2?c<*@AJuP?mRzFCDLcp_{w9fbecWTnWu;bZ&Bpndx-ka_R9>!sFy z9P!U``PEg3$!OFvI-1+dI1zF(vyxQ*46Ho-**3{c_H?&?mFU-EluDWrR9#W-> ztPMWCt0ir6blh$9j9Ep1J=HBn1UKyT{S>Qp$|NZ>dzi%kLv+>bkYR+~M&gf>&7Nqk>E2Xi5ep?7@tQeF>553P-~Y3pyf(+-CBK35G&NT%1y&YBr!NMcMXj&P=M;xu#Olo+rgZzIg8 z4EBXNRdjZ}*2hRX2RVIU6}&4PBBr{p7;DLO4%_7cIG^mRaMAjHulco4d<+VS?(t3I zXHSR!nz5X_uCWno4sev6uCVjo65TcWIplkIy~@&vDw8<=nplArA+)z^n(sAse3t6> z5ZF!AlF?aR9d!g}dE-PDrimAKf>m0jO~P$K^*Q!0$CQfMWO2q~8>WyGDggX&Q;PM^ zTuSWqgT;*K2vw}YUeV?i*6Ro5acQ*;tFgsn?bteZ;K)wCTXUm6**^G=|P#lT);L&zG+)Y*eLp?YVh=ri?xQ z*}ePqW~b*zyGm_)*j=6>6Fn5*+)h6{KUE>+q}n=M{UV3DSs0IK2iQSG4b|XCmYvtD zHcaNS-uj)JN?Rccaj%J&121mD)5{lb{wCj$&l$eIHT1ZP!Ci04FgLYcEN_dNaQTmi z>Nx;;U~qlTbtL~w`~nXHzsdVimBfmMo{0Y(QkF@TPL+#R7-u9=-|biL8L0m3zpA8U zPnBDt>a&7Co){)1K5u0>wI_~##_mlWWF5fL zE*=rEF&;kN^y0HzIQtRmuXWJ}pEp^gheC+5yYR|1BcrF~_9uj)UcmZ>Kh?~)+INVD zxkFwm=pI**!Je!TD&g(^p$X)8=KPWJO5jhHovo8jRIqyvvPV1}$kx5)7sj6{peIZy zm;U-Bvi40Gy%VT*$Fv1mx2YD23oOc_L5;Qk0zil>yu0i5)Rww?Zyjc?#8b{Um>CKc zSC<&9K?VZM3cg|o8>jMHh_(zh)3^fOIp3L(ij~p3JCN{D725kp;_=Ud(yN$c6Q5wi{Iv8ul}&+*LN$Go?{8;Q;NHT5K8+n26Va+}_T zKh>|j^snq;qjdn;S(xJrgC{qkRcOK@cC!{2u?GRZR&YNrhIF&;Lubz|_@5iriM{5W z(s_riT;pk3M0m=K=}__)vwK)955K8{Xo=VUN4)8jxjq&p^3OvD&0QS-to^+=cy3q? zqucdBY3674`VsP$kJ@a(l$Yc6?!GJ3lXZZD#0Rd1>(8POY>iLsL|Xj)B~=%`KF>1u zNojH?T+^bCi~`~RO*tt=VR4L1s_!N2Mh9|H?=be}P4yurl3&;WkwwSqs3|c7*A>wg zJ>rppHK9nG1Zf?TZsk{DOy}k~beask2T;=M)&Ne8IA7UNGaWl`2h6-2aE5jtHXm3?ogN8I3ubkkQmof^^?8vQ;9bY#yNB@-d8 z>N}}Vs5Xy6adDw14koVGNoDUs0;G1ABLsAsLu*OI)Vsd7@6POARo&HZN+g=?ogE@5^ZBwwyJp`<3cc5x%i z1n0K(OMYOP1s=#J?yY?Q`Juw4J*)&o1vXT2@;wvN#s5y_Y)m>^SFs z%H0<1RNMO<-`{#6t9BMR|CjtH+8ZTSA?3G{V+W{Nqu1K2pS*Sn3ju63nV4y6xes@c zAzi~X2g23`MToC5_3x|zwQJH7Cj$C@IpnQ=Uq!3om}>t(gXq>S5mJdX9=|~V>@do* z!0CrKXSV8IDwwj%IXpA#g&MPmhvWy+nWi0UC#kAmY!0eA|2oIYh(Ajz)GWbyW@`u$ zz5nQRPZOHwC_3!B7bpJ z$#1%bhhkLZ;wZP7XU=Yf&vni}wO3$|$KjS2>qhjc&5PX1;>j1C9aKxH6q>Ubj=5c` z<-B3d6PXL{g(5_p35A>UREpg@sfR| z%C2h&^RKl-@u85vr3ScN$j(BOajr<#+YJHo7(48#h2IBN$fJ)BItyf`7@z3D!pgZ5 zyGfMR%HxbHnJ=lO6Qix5Y~Is9BMulbUjI3hhuyJ$+p+nrxYG`dH=ZN@!luttW-7)h zmtb~#PF^&-i5Z->w|AVGnZ^T*dQI?tU$p4#N)upw5^OCmf(0=&5rUb0`#RcbjwmKc zHF|cS*fwFOrAWbl6A*tsF|}y=J`1sNCURJQR1B1LX{t zmVZ|sA{j~BXewBL7gS{WIrvGs{t?c-nI+bu2L3SRPr%EooOH_#D$zl$ zn`>+@4t!8xO1+U<{2uK+lmNR@yt&emzRu{}(MZDq-RLsxNBLb;;c>FC^^}JItwrJo zDZg+=9*t`C!W8p9&Q5CJJfCpd1I*aT zUN4Er99g>aZRfTz(-nyw@8*Jl-?`tK`x@x+CJ?laE8tQ|bBmV<*;F8d%LK4b!%DeMvCMfkpFyIR0LOq6UHHYYdwn*CUO7s3# zV9(RZy$$EY%4lV7uQ5~_Yf4MhyG2w0wwd=>+GQtO6CMZah1#m=FLH+&A*np+h768n z>8IS%B{!+!E0B-9d68yp(gXE{8R;Uye*g_JUNK2AD}!bW^t_zuXXQqmg{Kqnza=NGglUzeqBE-N;bzA}1m&dh^~SQjtZ3#=`e(6YI1kn}H>flU zLlh0|tQX+)%2*$lT5d`XH?m(k$4uL>$|@bcuyrAYg<~U8QlV0A&W}5?=4WMUCbiIy zyVbJ!QgS<{1q# z#d(dY8d2LipPIPTR9c-#>G}mQ#i%E?0z)SyLP?-Qw24|*>fy<!hc zXlktkQUz;iR0Kfq-#0lzm}Ppj^0zJCAinT;ypmYiIf6LAA8*t)D+{!}J2km;fmquI zUuZy!QI*ukTg?j;2Qco#(4t<8!1v7F5ba$%IhX57{%~<*TAvZ)=K6yLBh;=|h~?sB z0c%}t^w9XT5AD8yXIDeqGTCTwtuL#lc$5TifTbnr<``3SaXxCpRaAqCpU1|Z)GG0| zcBSyQfiZ!kvhI@_s^i=&C4z3|v1>XQt$0d1A(hfSCRwi<^lQT8OSK11&JhhXy;SQn zd&xiNKhW)%LRPhN#`ke@xCYNNcs1?gLimUk6`S;OVt8rU6=;309rVP1+^E#4^!O@Kij+OUmpeL&lKQXXN+mr8W z-v~ZHJlXg2#2hqTER>*HegO;EB=x+4X`zw)>-Jf= zy`H+D`Ecqr<&Vyy87e*nrtDseeD7TF?h*!qbi8C1|w zladC`Nk2w*_Z*RslnrjLJe|M=(yq|LWD1UotUDiqr|q2Qyzot~=qT9lhPg+GG&-eN zaq}^E6_%C(AH$E20b_gEsS-JR5vTp4w1nb%BC-$Wc9SwWl8uw+v@dDryP>N0(@)S& zNX)wMwT1O<^1bP_)`;2D4`u7(XTsM%vu(kbgb_*w1YQ6p=gqQzxp+7;M2M<9f;udh7Hg=KKg~`jC6(U&mODYY(v_^#JUaE|dRfKSNy|NlBL@w#IwVc2XI^am2WxAw zcor(o`yeZYLpd}E1E#C8FT#psqWCHP-Lf|@YKB7Wp!&}fL(>HT#2)*_U2BQ6IX^4) z{GX)&Bkj~sr#V(>w>gJIfRrS}VoxjhQMuduta9o*O6$Ki$V-_w*uG!qw%W5zNy+l} z(9akqoycz)qCDM~=XvItX{|qV8>LzAq?;!Bx1gTLLenL_H(8TZQpDgP51Cy4bn>T= zV1Jo(TkoKYZeV}%|IUT<1NT3?duw%U}Tq=y{QuM6agM1bSvAAq^Al z|2RNqAi_zOIwfn+T6UT!6Y=%#g~8JD`>A!0@?EH9r3JIP&e#~3D|zgpSralz#KlAy zb2p{9Qr2srt-L%`28|MeB$rR=M4_!yh`5bZWNp$ed{-(n-KT!ww_D(Fhgn~!dO13q z8Njq$f@Viiv(;XapYK8EZ?I6KVwD!MeL@b$_M~Sfx(!;S>K2u^zo|^{ciSDImkp+dsacW?r`7!z@9)kv_Um=Ow1xq z+%qWS1~M~m+{slCHd2=63QhHmDSJuJG^8v^tR!IY+t29Conf3$iZQGHWx1a!WBa>E zJ8%fvpvQ1`wnn3jx|WK~-6ghd|L>WBUX*$w#=)g6hL+roF6NS%&W z^s2WUw0Vq9MX#g9agp|q4@=fe-3V&VQLi20lnrLmZ0F9iisg_SkSWVl0aecSki-)6 zhuTW@G)tTMU{|8hVCB`h0WhZT!7%Z?!8f;-6iZLJ`D6d*JfC(C8%{6I6Qz5lVw ze_?C%I?}KSreN}wpxFjaoYLTIHOPZM)kI?7YmfunY=<(_OW7Cqyw%rRqcou((0@xq zdrbmOj$@}dvMic7^7?*w#kIM<#RNOZ@2&NfxrZ0UtvT-$+7Bg^_ zB39(W?)T@D{&oXB*6iG!KZ%2O(Ki*c`v2mZtTzn*a?H}LIE#LVou5XJ4@MPLDDl5@ zLSbtV;N6+hG+*$8!a-}MGue2d(>Rp_Vjz( z@Sqg&+)Ohq_bPK}=?+WYD0go8)s%lb8C-h>R-#fPfzZmkai)Qt(wK9#!$sy=;!~QVTbf*-(HM?t!S z^2i`8?#fRyznq3HgQj>c%J&isyRP4?iS(iZBfmM)=mt4K-2`0!l9Vj@2aStxB8Ua#+$~ zT94XdC?WV)RZbaF-ubR$Oek{k!S;N&etwSYu7P9u`VkzWu;L82M`UZz8wh$m<3AHb zpj#!{L7yTnEsfrl7^>D9J~OkZ!>ld?5fa1R7<~E9pmF5l7R>-d`s(vUea&==rn0oB z=b4ODG?WHC+%=nU5V!oe$VEhE2YoxiExvyc__H@+<(9*A2d8Edr>Iq}(D>sCe9hDu zd|0vkx)!Wz?N}b<>jQsdbk!MklIkOuk7gM(VKnn*ew^&vU$1+LSb{LW0%2Y!L+LcL zCzsJnjQP?*l$(hOZuC-Qtw1qvu}_Mn4f}&#aMB>A^lrCTDNcRtq9hQIw-CQ%s=j8s zoKIFEZ@H=C{0SBFpLW?F2XHay*(#qU(-h*e(P)E&$)A5#ZKG&;zjGJBf}XP<&*j?c zz2z}8#8%!qN0=}ol4}a<%Q@s7ymHgSMnLK@?Jl`g zPWyj|g?EPF!b(!AH&_gdAs16?*0r8Dm5qK0duRbR_vB2)Or>L?-2&K%R#BF)j1*25 zA0bV@6>XcAi6~E~MnE`#zfeaZN&)BUSms^#IG}xrXs#RX)LJrO@D#Vy>tn39z4i_8 z?-p`+HL*x|;_qVG8?fT;V50|Ca%P1>)}i)Pr?v*FITdG3VzxyMf4+|x#9vJN&%meq z9;Z2f@uT&BVK4mstC4spDfA0m!K9>Jer~}A@p7*3^mwZNUj6CCOy|E0i=eF@&^6}} zW)m@^)Inz2x9%io(yJ@CH@GQq!R)$smo$tS?wbHD`wId2OQUbI5BdMQ6cdL{LGA|7 z@m(#Tu$oO!HJ>3!%2$WHhCJoI+k8#e44K)HZJif2bLl5Tw?&t_W}KS+^6%N(5rd4W&NBZ{VzGqnoF?ti?n7Z16Pp}?8o51 za5drrBB#i*Rti9+gTFb+Ub@NMxJf(?1y|bsTgVCzq^M@p{)2SOzxyr3$31?wT-vBqJF^YBId$PjxEu_oR?L=V7C zK5n`(6<2CXe6Mke*!9s%FwZOgmzNvIt=NQG5{Zz07vr-jJXgp#vYmuIg&!fK_&ThL zp)ryj9{o;fc3NUH6zsYEMi~biG|N79FdbicMK(L;6v|$%!kTzBmd*A!#M*XnYrABQOv1i@)UCVh6mXJvkCM`6 zRC`aQY;eVmoN`kkj(F2$+s`fWO}ruNa+vO>HuVQI_bqBss~xNX^evlg_dGg~)%bbu zgvI7-ZKGo4eHfEInHQLiTO|5PaNa?|Er5oN&c0vR{*T$c5Vi?S+^PLOrF3$DL|{t@ zfCJeA)K{D8y~T33zBlSKRp$Q3C|D`in*mJ!VLrK*o$V))htI&Q@6ngSt+fyVtw-j1 zGuuxSf^N}LO3HP;B9&nNX_W@LWr{b$&GIB!HB!0e;~q;wR8<>P7NmA;N5wiNWWm<& zchW1r^AR~h767vA+$Zao+qbgcuh{A-+~!<(m=gwgiITs<{e6Xhy&4SjiKxzHo{x-7MMwrB;7Jh=%`-5|0If^aNe<>NiMt zZs^eXHd&MHJMSaookFBx8SnlM1=j~EqnkebE%6=_XMUxZ&q!jfq_2&CqU2?HD$-}? z_U$@pH)!(>s;4a)ojzX1&bW?5Ib+UnxEYbUy}KE}o6`+5hsX-3^IWi>SxDyaN7GvP zxh^>DwXtf*r7eYMs5Qw*>E0rIQR?8j#M16yGs(g-UZSQ}n58W}C|V37*|{bW@LIh7S>+du3TX zZMLyI#Kd@}>olvfTD5tgx9^5FbUI~4!!Vv^H+_@p=ix3rUQW}Z^VyIJ>pH9Ev}n`0 z9wRumQz_i>xA;YzGgRrJ&E<46W#hV41A3p3Om3mgET}%IY;d0 zDYPuWs*y}+tSTWyn}O@-88lcyO8F=2>*5Np%Eba+lAY`dbmyyrRgj+_gd6Ypjhl9I zVOTS-{>DK(qzt=L*1u%hUy{~GU%h-1P0hJUJ(Bo%p~m1|JERn!)?lETSplLr{nsSv zDarbBYX)cEn1@rS^PgL_f!^T%gUev6XxbyWJdR0F%(^webVZh-g4{Tmu?e4AytPV#d^iTCcU`Hp#45m*FK5jN5Zxc73dV_OE=#^#*RL z1XU|G9nE>2ED4yLyqoiCP_L~Ok`Q#bKl#C$BDJv0GUxaBC8~zpJDiDkknDP5AQQR- zV|E`I&K(3#$P8c(^E9p~@d4#-AmZW@E2m^&6?8VdZ`2-_ z{jRKC(Iw+S^h!S zfBg@!-%jcr+hFc#E}+G9qvjw|+AO}T#v!xL{OVgkX4Ctdgy5{Xc9_VtH|hdu=uw)RLbg+8nB!_LDG_UODC^JYX@vxc zup1L!GYhPhi;*0e>LVqTV8=4%%^}GSK1OMKUnetvk7MSZ!B}G_ST6YL&iK(xUY?OJ zR2gAXF5R&nChMQ1E2S&7nSL$ns%cV#twGIS>xn=Grt}?*Z!P`3TRAcC!p?R;D$Xr@ z?YWo2s~gkwD5DX*z+3@~i1FnWerU>&gc3@)adPUt7I^Gu*b+bz*$YK29Gd^1KI2$5 z5&kl~V^rx^&c6#lTr1rBbO@{03Aflc^`9BxLVhgKak-gN%FEto&TcM;OAmTxa5epx z2Vt0XKl?cYD{U3UKbPAYPVC8nYmD$$l=ABqt8x@6TnLvTcHJIt-z_V?A@s*Nwzo({ZV^@l#kcB~k3hZ3Lq51B5DK7Rl zFevbx>SO4J+N~Hf?nKbs4!k4JjuD;2+~VToy40_hcc6LC#e*ohzO87lj%Fug%8!_t z1s`hGm}mMD1=V{FfDO#*iZ5s~9bHHtMnq(WZsyCy${Jqia)VnQQ8L;nu!!XwA#tsh zNK!qj%u2pHLX_7V1wW?H(~iI zv`Z|IU@-Ngz#})ZqmFWF=Z7`_7NiUO&8>P zdBxgRee1<`rXPGqG!Hwtb9v-vnD_{TH;H==MstUDI+Ai_+b7Ha(#80?#1M+{LJDDx$Ni_w zUXC=m1mOET@PSwA=$#fTLz1}y)}u~wfR;Epr%X0uuZ8YE6WHL57 zvlkp~oNv1P$rGfiRu%lfKVV(h`@VUZT$?^Vm-|Hd1Li?kPdUi}yDJ`i6n&aBix$c7 zQ?fC8x@T8&DwOmnhSZ9;i>o$Pk{<96L0#@B%{k0puGj3T>%TsC5TElF%%2X}dTbH) zJa8JY1yZ-klLPV3!!*4w1}g{b9ZL&RAPoPA=~Bw7|A-##aLC*>@$qz@Ybs{nS5h1K z8Cma}wzU$1N*5)54>y|owjS16+!KCu`ekw&PA(YvZ^huJf>KAgiO0F!qxj0c9l6c_ z4S)i*O*vB73q)z8G@(56f4t+XcX%)CEtY1g2JRDoP0gAunWD0;i@4vBX0f36o;sr& zWV5fLW=;iSxO#)*N6Zc(=rlNN-zR8J`DQCUATO)+t?YKSNrhk*GZq>q{u}y;FNmr3 zIOcbOuH7*JLZ88aXZIOun|{>fV#q;@=QYBfX?mYS@jSUE(v*STm5rC{&3#;2S0Ik^ zXSnGxc)FS--*VQkNcx@a_)ZMcJ+qX1E-ZfYiP_3`B_H0jT7)3YP!+e`#2 zUiMWCQeJ^4xNopo+j)LMU8?NG;_8JZp2OHrt(G3M!s2H{VW|s@^RU<`*@4U4l?tjM zrBu@Rg0Co5P!)3{jWY5acA^8T>I;PGuMb-^CS=m}Cd>h4UCu0=^=ew*7^0)CSf|X$ zb5nbMO;(~zbw0z1<(qNhh+c<5-#5vZ=M{3EM)^)a2(Q1m_{;9#q_C2bB@ZNEeV_|= z4wsu|mPsnphePic5BL4zTTAY$cmfP*!>Tk@@$%Q$!S74uOMsk-8_uxJ29?6rrj$wH8XAxG6E)V_dESBs=}5u(&h+NfjE_>b*b92%C0|XcpI>e9e8DXk9vN zm->}ks@tYeB;m=KVTX(HIimg*F2qj*e=IA?r0Px~0apd!YE6pt<>`6WC5EA&;&R|@ zVK`Tf@iOLxS2%L-laWnw!a{D=8ujY0rn}R6rrCRuGry`f0n-vua6!Gc+oy#tiMmji<@rl^~miI9OYl}v}@AIjFnxRRG z-j$XH;I#L{xw+o`c-WD;&r{f^c7r}oA{$GAT>FQa_Au;Nc`uW(5Z(`^J)z?x-t4yYeb?%J z_YXa=cZZX$UlYwBn&w&QjAu}`U(~1wwj7hW9$^zp{^op!Xo5*)N+mj>+@4Z@|8Wu&&UV!LBgMCg%j=w?x>9 zm+-K3UN7)gV{N6R-KaXGE8+RUL?tc%*YQO)=l}N({_IS5( zuPW$GW)YhtrzbNUhSJLY&$viIKMcj(Bc@$Lk2ap3ss4*~U5*+1j120i*hK3@QV5PB z3Aj>9`n)2CZCwp6RikFRF~1kD0Ip#+qOrF!ct-ZJbU+MMfwC$LQ2uvP@nP}s%KMot zvnhGLFMb4S_9lES?%zU6!>GygDRsi(#7V6i#2!R$Lt)Zo*in>5D7&|j1|1j?>=^Yj zp$)AxQlFjSB`8big$aQ8IS5^B#6%e6Nn_ZNvcn=exQ6Ux7N}5{+-xQz-kV zDure;j4_dv3LDN}&M--yrQkW!a_|pSkN-h zs5FaoC_Qu8dnrr**L{Z?HQl~I%P_^&B?PzRDiVZTBK<sj>#t?jX?5Md$-bPdzFYL!FTs9&*cBsLfKt@Lmp$X?^|BGFI@PYrs+ANHldnPTvT z%_?^zT7%yybWr&b`pi$I^=q8X+N*O9;1Eq;2Hm4sJKH5`r+y}yH17b2&ekcnRHd8z zkq1rAjsi1y8;BeP-p(ZuupY8jZsp%RZm5h~O_+?$9{*or-G*Rdi?k?KsNcsk!@VDM z$-i`x5PsaGim@@m4FQr6D)P@b`j2gyJEeSeC`vhGuoR5z4+Om(_tJg?Y(k5Mjy(9= zX>JXOidg5hhc9g(FgRtYn&X4KqmZoq>ua!{&-w#u@}@DTt$N@xQ9k=##3zF@-!hjl z?=jb5YQZuY%y+pu>v~(jK*_JX=U-q8-*(|sA#<4iz&5>^_|pz}M1kU1spytkw$np> z{9q%gXJ))J&!^;0_BRF8@o{K1E~Uw-Z$&s?hHyaf8y->uBzh!+1Js^^eLw<;mW((_ z^XlT?md`c+B5EM!sERH)$5u?aJsCP(`{!B=fL`1b|6wZw~}}v_p+bVo)hEfn&3pl zEK!DVmk_b?IdETxL#z~=1kT!+IJ8(om$oq1^GKQ@bz1q95K;7i9|v8Js0RKX@L(#J zEtH9f_}s)fOk_6W^)5zhJx1v7FtfVz@A0kmbP%?;7QxLjFsT_x0t_AHaUn6+loJziI?O{)`&!}-xyG|EbYp2kYK*ee3t4AhrJHOQIU4A6XMfik{N=hZ{0?Ag zrtYa_THc!28Ut0DH<@9IaOP_^aM-t)dCc@(Dt)Hh5#_rp&aSpOYuqQhvFTd|4>`C? z?|q&+39LE{uzzuB2Z5FwJgw>SAI1Zzx)oX))&kCGe80OO;IGtm7Q`OO}XAmtyx zcsz^PSa5_HC`f-&EE3mYfY?SG`F3^Kq;ow9?S|n#XK4#s4)>M*VuZtEvaNFd`96g~ zUaERkf+R~1fr5tv+oT;(76$8tn6A9H>=vxXN@sXdu&8m8Tw>Mc+f=J%fbc1xfseTg zY#0Jq;3nos*!&Dr=U@p^-`J-Bx~*9Sdylu!t;^q~YQ4_xMveA0d%o)@KC7h_+>55?zApDo=5k~+|VLf(SHk`nV2kUMWzst`gcFc&H7R*ho zKk{Dt8;(59!kliU7L*?J<7o=NDcH1Aa`9wF%=rEyk?Y+x$G#Snb4WJ5-;VDc?vsm3E*wwrf5ejD&?5d% z(YgOK`Tu{sle{HVD#>A$;`=Z0S7 zeMM>uQB>`co-lRglMEP271{W%EV-;lFRWBrkomj4maGBJUk-5|2s!2w&-0m3K0%lw zdW&Rd=422n`jVJzHPX$vmse&tN0xiWw~G}bNV$%S zrl^CJ%Hm__wJkw<%kMPXINZMe`_adiEpx%OzvZh6at%i0`KL#WdxW3ddRi6U%it?) zdwuIauAGUGo06N&MdzA1inqPOj|{{!NA@HiIcZde+Yil&&W?@vmZ&KoE#zNt^^|B8 z@S4qnM>mfLv#gf134pI+Zz3T7p^mZH9h;VZhxYy{-Jcn8hlZ~$o?n3T1Gs_x(9UiZ zAHf~7I;Uf{jR;-n#?==ZG1sPvXbqo=JL|=6cS?e*H)4!@LRaSXST(!;e)6fFiU|d1 zdq2WkOTSJ4N>_hpMlDwP&-BGSaMTasuAhY7Fq{%4rp(O$)Kw*MnVTetr~^OhudDaO z#|?CdkvyiW74BjoUOvQ+VyzwK-dkI|?_Jz$&D6wJSdQIv=ASQBeUb6h{(Dc5p7LgA zJ{%Sxaq0=YO9h~IlIC)=I+C2r)e7whbD9GokPPK6&x?d3Ub=+U6i#!c-OoReWZOJAEg@iA_m>Dg8jOqPVgevJy$Vr z@UYOOca*jis4wq%Y~GhiTy|f44`j3u0A_0G4YO;N-KNf9A#0f1BN_k*{>F)}w|vfu>IHFgwlAg}TdQg6yBsR+b!=Ilw?&I*?fURl zK(;K#EA%_c%G^8fsH)=td=pX#a!=6H!;p$*OND!CPJ=$ga7i)i#Um-;rVKXDO2&{F z$CfndN$92I4e7nQ)|7ebTpqcbH`7&)vFKQ|6vJioS%y|7 z@**M*g+1!H6sbCIppz>aMtws$2qzxLC^kW>Ca*>QW82+^xaIvu{@dB{st^@}EEknA zi*~b5kV%WPyTEICnAADVca^lp_M4^nzi-K`=wcU^tZWaC_d(*&OPdDfi(06jSSRmr z2h`$jr(dDUF4Wtp3HvkSZv*{8mlpIxSGiVS=|ArCGi?|zRU3=$z1nP2A9hIkF1@rr zEcn%Xmt5FpD6m8ioI977jIhJ*$OwPpa)PR+g^xHT@Pqs7u zP3}<0Us7hv6`I?0tPgE{CH&4vz>Kb6XUKbZ*`t}Bq!1)Bt6IK$}oZk2n;j+W-_ zAcBrv(}KsykG0Z}5EjAH1IISlth%Gn$qOmHHd$wyv5mc#vC;5l)E>gn6T5Gzcd;)h zsf3i?lo<7}G3hSbJQ2RS5X4pIyi;W!>i<^m{(>FuGrduW5*+0(6ww2b99`=8oh9V} zLnhfF6z>O|1hVpp4RUXDZSR^jG5F=6gNpp9f1nYf@X+s3iz&^Z$^<|l<$NnND{@jY z*CLz`YpYj;EiL6H z;fW+V6|D5`79!mLWB;9ZgqAd)Kw?D&{Hlt-OhTUj@dcyiPdeik+`U3{i9tU*_@y(6 zX6FR?o|fr0Mbe8ijW$^FpQ|(nzgx2V?ViW&IP{D&y=gd#eKgvIHYpikym5s7UVSb3 zf-;ZxSh~H2`7CUh)OUh8@3A2BoME(729iWiS`8_{Jx+0pE%%Bdd* zHLr<5q|)5_22~(-W4_mtwPa|omen+ISCiJ-X{&(*YR6*X|5G&61!#BG%2P$r55gq< zmM()Q*feVmd$O#-k41k+ar;~-Wk2E&N#^0lBQ8`i^Z$Ar}?M*WL5Rb zi7UOnEQ|v*eY$RFq7=(`ihNZ6N`7t3&I2zZCb-<|NgC@XQ)+Rw8P8Rt5r?2wnC$*5 zu87(^>Cjk9JVC66QA~7dcSlHPT;(@7o20+m+_6C-pKJQwXP2cj=RNY=I4*t>SX~er z)9n>ASZ(Q#tv5k4ZrY>k`t+E^mzj-Jt`)%F>OxMzHgkH$WJdF2zh<|vGu7j%V|Ip=(`JP zAVuD-!)OB9MF@0QXwjLuBgcZCg|N!MA5!F0b2aBL?5tXDOY5YC@?I7^(q) zo?`dAhAC9MKX5bH&N63DU7QBsoMVfVBh~~3#JG1IYw2WpJzALgb$D9CC?lcllf7{7 z004eM^g`&IJHkqvP#`@2Df|_|hvM_|ZYvhdWHc|(kG+cA^&B1A00Kh=&;tG4iS2Xau?1r@jEOT$J;@pt>EXdHJ82k2IV#`WSNP#$LzAT zs84Sg@Tk*)9ycOdQRKy0;P&4(&d zIF(8bNEQz3`JlQ;aR=F_n-%;oynLxQ8jK zS*zRVpPjz)5xu4I&O(7&M8Tog4O8Vgr)d-u&jfY;76Bz+XEdAJbUX}Cv&5Z{0cI0O z^LgY)j_S(}SuKhU_0yMo1Z#1U8+6D=8DlTWGQUm9HYfHsW+{D3*pBaCN7VyhjMb4ap%T)7gl_U59vcsYG*F7$hN)u zMVNm^VkCih5gBK{Z0 z39P=2#aF_a%sG?ITD)M7$55#tGOIqEWF3oE~3W+R^~EIE{i|SIktq| zs2?;z`tD^Uf%M5;M?gd48Bwap9^Yfh&>CDrX_yi2yVBULN5ONqsu_BAmHb^XjxkZJ zriz>DZ(dUyAL+TXRA=--Lkk*A;ie{X?s;9#Ted(MOW6i7mt*#g??g4zn`~#J@4&_f zHxM6AV?t32r}+mZ$89qC%Y6i} zKkh=D{0M3QMN(ceY)G35@?-Hk;B@E<3_h}PeLOXd2mX;Rm*(!+A5nN#ItcRc+?wD1 z{h#IU;~-{K`5vc&pCS`o2F1()h?1oZQp?;^1)QY=2&JfuudV|L%E8EwGnh-}W|Bf{ z4)tdT5P?lYKM=lTGV!!C^dH|gA{zE4VHSROwq#}9NTTv-4t!7rHDm&URR`iZwxKU{ zqRu2)NYydt@7oJr-x})Ff`B#iDSTiJ`2%l9QlR^rm36%(E!Z-{$6D+&Og-*{MAEZm zHNhU)I!STg)j~$n-+ro^Ql3=1Fs`SI_{R9`$Td5)=1w^wvGcaFJ(iZIm+w4EU;pQV zhR7Ux#tL>DvGgcg6h0%Fx2$IloJR&i0KiiHJHWB$VU5hd#VUq5oV7OfDL+ zs$u83XzK(H=gHGy)i>tQgHCB$;2@0a+<(eiPU>A2xXdsosCS@xi}e{qo0h*F-U`{U zkk)ATjIthBBimZ@DkY)=Xn|Qz8hDiIX8i(Wn-}A zXc^m`qX=ds8ZzPySJiAAbf{y7R^1ONUu)c``7rg(Iwe40OKS( zmU(j0QTQvNEQZ{U82PSCzma+NPsM`k^u%}>e`CRynI&sYsc6)E4-V(Ok#1_MXV5X; z%w!yx%FD(?=AYA74%K$2LhR3y$$^LG!k6Oyvp$nUM!Gng_m>3CKKCiV5zo@CBn3qP?!@SmCX+NVJK&cE;9VW5WsWQfjajLZXoDAj(d~ytPq}w`qXtISVycJA?(b!y8IWlwslMZ8T)q4o^QIO>ACuCET280U?wae zTf0jFd_vBMKa5}|f%O^1%@z;PZiSu!$a}i@x8{M4or%M(C9p2B+ za@pcO(s>qmFGfRxd^{El&3bbx>+rP=b`O4O@Ouk-XYP8}Zz*_Q=a~Y8+&XW7k8#|y zD*xf2*P-Q?e-{7t>mM!u`V}iTs#!dH2U5%Do^X6G=rM1Xr`vcCW!YrmC;Ks@n>P`2 znPP_4T}kiHoC$p5zn!Pyd`WsXu|sI-(J80DH&@*KWHi-{`48zM*}y-MmQ`FjsYrG? z&G_Kiy_{RH4%D~ghx|HV_txEVY47AO@7HJX=X{h zK?@*k%$LR_n8YqhEgIbr!!c_uLQVTE>E~7C`L09+8p%n0OZ7b%_;7aJ1>IKizdm=G z!l&T(cIN&3{evvt6;~L&Nv@dP4`AhRNM{PU#E;Czy?zffbYq69yo$o|uCUgjaj`mY zIR>B8%`Rnud``$HF`hZX{F$VpAh?w!*F|s*y;|&ZiToQ!q>WP?WuMxoZytsvLMLFh zv<|@IV%J=T&GiQM*~w|(mkK0xO=qiRdot?Qw~$)IBQZF_oO71_q9vYqM560-!}~ne z1&gS5TANGOGA~5Ji$iipZj^-=NM)bebaF6O9%nEozNM@KIMOh z;%WEUWUrdWiB+WH>ipOdRdIH3v*{;LeCR(@zf+E#Tz!VlC>XA>5LCw+HBw@IC7LFH z?2t218Ip&?x@kHQn^b+aqOH*wD=HOM0DX#+M6ggUK=DZmqE?eu9J6BFy^EV$TGs!}Qu`P5YjwcX=MKk&39m zE7bjP(b|4SPR=XsI8p3NL#J zM4xDpPsg8v3?NAH%|U@a0(CKD8&=dDHnY)M{ASX|ggF#MIpDn%_Gi%Me2nZ7<7V6@ zdCI^JucDY^D-{)A9;Uh|w~V)R@RET&0nYlt@Z=gvDevY*t>iGS6<$B&wIq&mOy+C$68cnPHqYdOq9*P$aMmITr zNTDpm?SQEUTJg9E11&(d{Fkig54ELvfP_^=HH@97{jy!Xl_#sheD+)UNnER-z)Bx%i)n(C-a`;e`!^wr*ccAtyn=ylwb zM3|V2t263fFC)-HBhW&np0Br!&QVv!@~XQH?eUPy%}DtcoOGv>{Ft2C2(d6Sm~T!k3~oh(D#StzYbr+ zO1D15PM4oj(pU8q$(CCoC^l@;x7TGdldi%v*h_L8?~HvR`5@kBtJVWO<4T=fvkq1y z`Aq!SM<3=KmHc@YFL}*VJTx3H+#qi~hu{IZ@nVkeAdF7f@N-F?Lc#N* za3kNvhIkJ|Rw?mQU$KGs)s7!YCf`HK%^ZkMF~{oucO8CPbr7bhMG}<}Yts)0{#B+| z8J=Buw-6C$SXT^i+2}eVxhk*JIJWjS+9oOM;%wq9(h%!)zQzFJ)~8a7QSA9gplN%_ zXDRW{j<~&||4BC20^UbzRF3;$#`GGCt^)&xUq1`1+)wi09FKzNc_kI-8ED`Fz4q5k zuDHYQ_3GJXDE`HHbd&XU9lnvX6$*Ts#nY++SaE{MR-bz-~d2J+@>-ndv|C->p^;R~=_nIOp%S_VVrP zld8CG^9Jw@oILZXTf+JY;Y^aXQ_4oJOeH^j^!Cz^8<59X+v@doF?KSaz)_z7O3Yx5 zy7WIk(0l&N3;c(}Cd~ETx@Wxav=Y=ll65yW9{Xv~iJsGNZ2u;>5UMT2ox5RpmaFG~IRDL3Ew-z# zRIx6s^?6hjpb#1k6pfh_ShO2E70WE1UF%5%8^_UkNS<}(g@niRzxnEe>v zojN06S()3uo$XxZN7s$3li><=W}CY9%-^6OM^dR!jsGoT^d!$vKXCyq&zoSOVRq&j z!A1YrTenZ5^rd>pPt#*)M56aZtud(>gTjp=F%;-#7aQ>niFaBRb1L$F6Z*urg9@0~ zu5YRcq(_s^pAPj4v}!7ufoznIz8jLnsP7n5aYgc_+8Nol?`KnJGyG-4-#uU|&i1=~ zx>o_Bk`aCw+@}pXNs6d!^8v_%){}X6y~jM+=OX9{ZjJD0NCG3vE2UCUXFSj&jfOd4 z4?)&3j!Hqx=O~!}{|aR1eZx;B+2yX*yPR`{%IY(^mqpM1)?1?uDD{3xxNZT(;51-v zB(r=0%SFVPw6I_sNGnGxMf|sf~9qwHtY(PIK-V!}OS0=4Y(mF+*3N@cxfw4<6 zm%>m>V|t{=A=loZ(xi~dy-QI$WPR^~JBz_HLrl&n#X+k{jq~SjxnmQe~h@U3eO!d<|@y zkBfY|LQjf+X$gRp+%pKh7D#XO@qlX9#|-H4Z^NIRQ?;Q!&Iq4BjC?pki9F6u6`xZ^ zFeBS!@A03L27f+Z0jp_;EaSr@-Gc8kJob3~*gW~e&KlzQ*%s>J8XL9O)?k(LZkM+O z7BP}jbB4Y(RP_VqKll7013AaXFpNca=yb;_)Z{6ScweR8@6Yd?1=-V(!KmKCjavRwA(8vP=jB+(k5(p;thluW z6D9sGPh3ahP2%>_=*(Cvsl6o@vJqf}(n4mJ3;#G#>ZeV#Avro6GnFFpvGq74eP@Yk zQK45V{mgf96$=a-J=7D%EnOo}0+p=Ds-D@+8NRYK<^w4>rsWZalmsU|FIM>!dm z^gQOz)feCk-}j@1-q;e*U~5yQb9CE6kZ|8h-7#KL5+Sfb&r9jQXkz2sA2AlHPZz9k zI@sV9BQ&|vaiYgBYdv%)baUY+;SKrB?h&S470xKV9;Zo2MeZ3tRkpchD1x#ru-?K1y+NLxl%z*g5X8vFCIV9nRN_8^9)Qmx>Tr0sMB{msxf!1vU||JR<| zn_`bmeO8KRzD?(s6kb=j@adhnsgK0J?9Xuo#7PDiwVhCB$V~|}@qC;3im;Jv2Hz-Z zg$WyZ88@5J14q)9O>$i158SNesY!G=?9!!k%ERzs7%R?{bG4{uxugZ28P(7lY5gCm zWud5%xOFCQ(RMZ>diXe&B7Yq=f<4NSw%AB~VCIcwQSrvtL6}2~{@e>_s^BnR#{}v` zw+aL<->rZ19+0xF?ny0Akgx2$jc$o)a!LU>W*G(kt2dw|g05u3$F2XarhnpqS~ZYKV93IH088fkZpw z?-^8aPaaEMRooB3fND39knLAy6i|;{WE=bH+L48uryxbHnzwcS4sIY@{nSGz6p9n| z_MjeGuZTRqF~~K#*bmK*nV%yLxglT3e``ImImK3l@(`wj+Eq|cYjJR=qtX|B(eJ+a&B&97{%Us|$c{_ng0 zofy{q8LaT0TAN!YJ}1V7_|&D9?(s)AnBMF|Ig(oF#YnBoAw%tU+wI%F-`i_JmnCcR zOde>5I)b>Ffej8H;xkr%Kr(p;@dp@)tgT4cM&MaqV9&rI`~0w2z4@;#n<5{mf7}KL zal;ml06h|G&fNI5K#A2Kcg$)5M|g?NnF#v=b==XBz%-i>+R;!EK<8oe$f_n%E|2SW zh~?bMo+<@S$7}i|YhEI3D;!t1YfK%N`d=tyoShl<2fWq%ReO<~y*ep&K8}T2szWPK zF_EwE-o3(3(r)tctIuluGdal-k)(9=kbriW_E*I1VF1wSH4fkZM0_L$?vCP3VjoTM z`rn{qL-x?p4U-yzGq8K~`E|SbDw&oN`b)^?#MeUxSWDUV`5()a?f49>Qp0O^4kOzG zcPEHPe0I<;5Btx*H7E!v1zc}@AvSEV{XXH&9}6?%$fjx)ASy))91B7&sHZ^v#a zAU#Xr_q6TIw^)TveGO@K9v$;>^?pSDF7+I4b#T{4xK3-Bk6nNLsrft4IP$+?u><07 zkHz9$?ac|PlvznhEia?AIFCehx-eiryE9CQ^pMNEZN{l}YF;22IJVP_!ymv?nzY7a zUJ(SJ+L^AJ5>51}1vhf8aQAqU%hD&88H;qO)S^-;a^_+^i!&y!i(0ZwQq z{Su*DdA+CSJ0*!l3mU+Un>HSv9^MSS{&j$9|GP@{xeo>M#OxGXZRRDW{#jg}%`EfN z=%tV8w|lq975QG1rahH+NxV=!1?5TljPUQ9W*5nqv_`6**U21Y(1O04Y#7ssnx${g zS1vd<#vJRexHlTH+s*NRCYzKeVm3<)r;L;q+|Dv)X?e}46?gTQf3C>PEg2PA(TTQ@ zZ0FaET@{Anqi1j-P*oSmh)TgnbM?mdA9d=AaLQBF9)>vWUowI64@+u}JE3*ZNeFKD zR%MviALbw?XL&umj~c(7u_C&Y*B9QaSXM3J+9TULoqC-Pgz$}w>u+A?78TO&^j4Lc zM+fF@jdZeuIFSx}&)=x(`I7D(C<2GB3NqFrrM}&n0Qa*LWp|1%i@;`;IED53(H4~3 zYVbsd6ozJ))osWqAOUSl75IqC?I#PMhK$2(t*_EM1( z2vxuwjy~hGL9WD(DcGUq2C8Rwm1m|D^2112#6uern>u{~RbZkN{iRMPiKi{C+PNN^ zcyqirc#YL2?&gHL&+$1vZeFOi!ztZK@45tA!b zJB>suIlOl=o4E*^pA!uKbO-m~O!bU#F8*^=_(t%PmZF(WB6M$rbRTy;DaS3 zsjNR?@Cw8uVHqQ{n6v-Wf8`^fXmznrit1&rC1)pr+SsSc(=>d`>a@@vyT^<9!h{*s zg1W*!7zmMmn6I5?=Z+clFwD(H1Jk?WbHRhCh>j9&U1Hs58g8G@bC4<)Dn`%|y|*UM zJcc%A+4p53l@DE?6$Y*FX8F{@BO5VRV#B!u&X}2v5k&utL@ICfsc$bCu4ZdrL^~so z<3#YSrVJ`UvQ|57lQA#A>bEzLiTVP?s}*#TE<}~7^zO$PNaZg8+o-Oj4T`Ff_!e!gsMD&^ zSE7(Ohpcy5HB1}69p|bbMSPN}MnAC)Tfy^bF@aC-z;@2|+73K0j=C?*dh02=4w`@Z z)0dwVh;4{*@7osNKBku-99v5@US0$y$&PGv4R284uaD7mM0^$-l;ic3XY+%ouLn7{ zvO~h?A}7tfskok!1@FeG8A`~nR!r}xw7k}()F)Y3PJ9#gqtd|o)qF|&B@}D_B2i!> z(4H#DR))L#Qv7)L6cw@NqPrQ3)7Y%*DwKXdfHZkC&Ic#ohFnNj6mFVt7u zy$h0Xt6+R1V1)x3BpcgM<%4fe4qPJW=C{qtV!cO9nA>AU5+JVyC&HS}>|Q>)8QdVD zN&Mc-I!;cM^+Z*_e#jV_+BG~P(c~LEPU_S71<>&3GOK1J6$ZcLg*Ayh?CN3G;~P`S zs6z#`AG~u(#)@?-ZU$}=+HzT=u#nP9Sp&1esIgX((ea@9WX7%B6{}gBu(?|%K$Ak` zN|#f2luOn-Gyefl!|GPVL&LdAU>}(h;Fw>-*dV!7OMcHPqc#d6_X_`z|90MA!- z38U#ljWAaJY4%j!|Dw+?UXko7tt>Sgh&TA=w!`ObS=fX1QO7~uXWi{RvX<@O+Z2+P zq_4ou@mlHle`Q?%F(Gj7@_gJ2>!#t|{J0&w?OE3Kn<2SpU`q%g$*GFTX~>70Ph2nt81+KY8n!+okFj}VxWN;j{@AT6GS3K)-mRbGiPNAJRq|j~3_z)#eB=qE7LJhE)dC}f&V0@UN ztzGR}Hwi_t6?{Hxwyawrh?-WOR2FN_Gq2FP1t{QYu|V>_$!eR-puPf=~bfg7teg(A|J zYu;)c(d+)oa=>0eyB*?Ror50h02b^U^E389enAfoTQve?|2ugw*1dSD2(0Q-;z~#P z*T-!Mw>#9a(=MeX z=+mX~I?qJ(Lg~8rsqTe8CLF^JNzgebQfag=A4sZyZtZZ#b*{+x0RJAlcRz2m9@ZgW zusK@90cHE8dkR&;SR%jvsi&?7?E4AZMVCb9=U;BgD#nj+OL#qs&fU!F-Hfnz(gL@2 z7^_ls#j2-XPLZ%W(dTAiv*bLw9i>m#A}*cL<=YBU`z?81YBEW)CG$=FcuZL>!wTL3 zJaog=TxR@nM+SPr9{G|OT}ATuc>>E@J|LG)-yFuz>)dqBH16w}mu_2?{#3F49-G$~ zY*KWG2(&&$D3|S{!xuF}&7lhbz4@ZB3DxPxb)<6!9V$hyr=;AoxFZVFn(+cBsiw#Z z2(K&(TL%ph+mI3u11HJT^bT-A{Bv|;>9PY0t>TxgN+;iWDy5kwR7Se^P4>_#Pn#eG zCd{|z(Ek=1$s*=_rzw6!Jvjtf(sFi?aenu4Wh7urdSn-uSu?u&EACZ%eoY)MoOxhH z-#j|*+&BN3IRgNKuc<)2HM>v$P}PH3g{}OQ9&QT|-{9-2L)&>>!T7FmcnD(BsN!5R zK5dPSsgrJt8hV+;)M3jv8FJFTNX)gC{n1+b7r=J0fT_E@E0cR`r~GqP<7%E{<%Ds; zg{~}EOPad<7kDpz5i+NS4Z82w`n$CEVC4_^)@7GPrv%we#%sB^;&$Z82$Sa-uz>{Y zE3D&l>Om`8Fdm%v!^*|L(4bn=3CEw6TebF=9a}axNuL78f4Oen9t`=F{4Q1)6@sS8kyg#eP+)xy~M zx4E>J{$%fWbu3%hmxL)Bw2u5|=4!vH)6yZ={Hl$1+-?T5WJRF@1j0{9Dr+_R$V8ll zwLBj+5txBn9|Dmcud9IF@(K~zeFfMZA8P`PNo|FfSm?R>ONeK^*d*3WiJyis^v(6H z^`;S9Vsm?rHJs3i)9%KMl!~&2!1^MzC;FIVq~~y9Ylh=z$FDr?rKTN+VT$Rv$?YkD zeeHn|)q=W~?epW_%M(+P6~AJwfbZ7?G_%)W?5i{Dg4ZU{{5spLaCz9|hWds70I|#? zd}!H?=eYEu1YnVTD~!@wlwf{ybz=?7(dmwRA^D~C1Nlz*hkF6YQyY&oqNd4BN3(oT;vhNgTHy~=qNc(>z7*FGr4=8EvTH2z zU-yQ3+p+cNt|wQ@+dV$pLW|@n&>wb1D~F(lQ!82o%XWRntTp;w%dW(pWp}M7TA9f5 z>;MY*0vOKNMoN-Do=)*L)KDJjf4Y)eyE_`L*u^)FSb+tLpyA0iF~tYt1_TJEiA&*N z>pZ~c#ZwCO5$4wLrAYL5;E_8b3Q1LVNO@GfBd;dcN6>eXPM zn^Qj};qg0bSORAz%w*W!uad#Cm$-hYD|UPi%c~3az&7|ggZ}{>djX}+$B?$vyi@?> zw|na=2Z;XUag~-lD{!h=EM>bUFB2h zLFsYyn--f%rA-XpoEsST&i&QM7CM~8w_?SGnOAI%wnUa0XWRz7_E7i!-xaRziWm%L z!Y&|lM>ZGw9HkY_bjNCECRdr*H6$hLIK)`MRqN_IF)iY%^tW#DZ;k8T3ex!dc3?B1 z)*fnvi=xXCW`U|UzI!A|;n;YV3ENzeB&G%$L{nU^dms$!pUHbVNBHntmIp&}_eip5 z5pu`KT5aOzOacNLR)to_1~6>re1a~9lOCzN#ZD+nx_KmWLnh4YrQrAd19537^*Qe0 zc@Ik^OR&WYF_G(%u7u_i5Ny>PAD^2S^?@-EG_lFOL63{*TCw1?kNT4hsT5ZOM=#{$ zPqgEjH-b(>smGR-pM>)B4Nqs>7nu$Ud%E&{6^b*)izc+;ZA$1e=AtshE_QGr>n$}q zUN1IFa$=Fi{?!N~bE4dusuo2l9#}OeJOg57PBb@G7;81Dhg9i|pX<$(^mo?xYmdk` zgcW$U-!LCTMl8jONHbWJwcOo~-Kn?(-Sh(*1MFJRf7^zE&H5KfU2rFNg<^-2IeKH*u{7)3K_y_jU z$He;~d365;aKv5p;h}N6uWp(Q6*D85;hg&+KneX`$bQ9xVR9cYxJXMAzPf)g)yedQ z<4nY|yT2#jB6cLr;9Hh>cY}`C@-|^)o^d$^=Kj1iDE7ouCERIpLCnbZ?Jbh;v;*6} zMBYa)?;lWH&|7*{EU=v8SKb40Y+X@iT5z{!`oyAVc-Yd2G*z73c~A&zvOgy6W&=#}el@_9fN>-9uC z`tonuxLbi_G5Ri$uiLqO`|TG0$GMsHg4zA@!la7L#{RKi(shIjS*UN6-W#XL6kW&h z=>WEQ1Wm`Yxc()%qDBf?+{oWyE;|nVyY|~YIb;4AEYQ&Ij*nET$Yw8Xk_V0+UtF-+ zJ{L7zZVb?2JjVFB7)OF%y53R^6r?Db?`d)^CiudXV_n3TO@+u7WL z&G1QJyr_C4uA1G&4~G?=!{2i|yQm@n_RYqvv~Oh9I6ir+S5>_U-Ly=-lTlHt8Q-+N zo%#46ZWFHnwNCbZVCuEAtokSO=Ml?~g&yE1-tmg9^Xy8aPeyq<=XBtG+gTrYo)2Z5 z8?Sk9Ws!^WcF>igdt$xjZb2SkA3gf+k}7KjVc4GDqkJXjK0nFFo`YSGKcH2u_AoMy z`<19lJx(0?bX)7Q07(=P^*;2JRwF7Kw(zWWVal`tr$OA|b*prHdD6(xgw8?I6zWLc zKGJj9?y!_{awpY~b}8QyzM-zUfNsM0E!-1FO6i{b7~f|zoRfc-*)pjzZCT~>NHzQV zLUJvo+PnibJ!oQvQhF`{yV?pla&5)s-V5>+LzA^>ARr$j3SIhCO-qGDv8!4yL7%WQ zRej>MFt;(rktn?38528MyXh>YVX!7Pb$KS`c%6X>^nEWqIgN>-wtX}Gj(rRt_Op}M zg_w6B#`!=u?7r2?4Rf*6KhMNW&$u!VZSFn``X4Dxg7x>QWMHgg|DO3rb}-C{6VG-G z%*GfGAIB_`TIe&G>+liNcJiSiBW7bcYKP%3x-s+m%VP~{<){L%+g+om?5eBM2IOAM zd}l86)=8WL84_qOt!VAyBiMx>7o5-_j?-Om7^8x*NEf4<2|rCW#)DWf%*QW3WM?hf zpN8)*ZCXn)K{A3)i~-krD)l0E8q(E`nQHoq_2{LCg_1JzrGpE(kX;xg_XD@5dMbRF zSnhK*`n1My?YwC}Zq{RmeIVf|!{PT>$f!E#t=l;301N+9l?nC3Bq_6Qc3Lx*tOOI~ z(3Gwg_@gOZ*&$K13nl52j00`->F%8m>dksR$%DtQ!&PvcXFkD88_T-5ZRl@RPLK!y zXEd?uJSJIFIj6z|yYkkPNBlO4LbHq|{|3%6f)#Y`k;VAhU9noQ;H8i(QB6aT-I}%B zjF3tQ2nk+-;GLT&=WiSb|9s|j$W3QT2uMK{DRU(3^easNHj1I|0_U{`Uo-C*IgL$v zgT6Jag)W8dewQWtmlK23{J944x?TqCUwAu}J+K2pm*55iQ?rWNR;j0fmZVqUa|{?S zN0eWHa#rpj>Sm|_zmoi*?Lt@kty{Ia|70@sK~It{(SX^IP7+2cjrrzryB?uB%SHyga4WkPe~t_!%kH`V z3%BEdmSjFMy#joQnOsTI=#S`}by&^6fSXLh8K}A*MQst@6dgcn(<(P-y@a)Q0|OXK z=Tvc6(@CyCweY0uul#9YNXW6pb*ghOaIX3Z;#{=BYH|xoLepKdH$KoSp|`ALe05h3 z`HaLRj z!>)=TQQl2G!rY_ynP|A?Dfr}j2gBQ*W{Mk&u1ml1^3#RLL9@A!2jSEcWY2SMv6PHH z9K`ZT7z!1}UnhW-drstaXc_L^qMW+7mmn;Mx}lR=nemRVKWauU9>rXtxaWXYVpQ6r z5j;dUFaO_}WQ&E)dTVG$z8`9o@?U9hZW>L-*P50sc!P-_xS|hD6Pmv<(=V zw|`hx#hV8iMU}W-Gp3x3x3D!52zs1l6OLaa!j=EC;MSu`e3piqnLivg7w(9<)<&ka zu(3WOdgag(q8IlcbYSW!#~DgTswmD(8bwMM@;v&-8I`c<>sHW9-nMpmgw}UEfz`L6 zVFUh_pYl$Su6qpN{u(rzz~h}rUz^2D%ogROos<42|LRiw!zA#^cFYy3Lrfl-H@l8x z%G=8-^>Ap&SR$#}BN$-T-)T?&!MxWAj*$|~9P%DEflmJ={lf)bYNISLw#zI$l4{(t zoU)afI@{QoVpGZE|IQk)+UXm$#-cXIF7}e&mSo2oM8kW(30dfZH}BYg#E7NK5%>?2 zDV$GP`85Vm2>Z(XpLv^dVV=tbCGvbx$l7udWgIcSQr2)ilJ=08jZC$^7_KK&+Jd^| z5S|YoUO#LUGE`;|OEP>tp{_;VYIs+9flsc3oU*SEcAK0Nw6oXlgB#P$LPHb7d>ak$ zweWvxnr2ZR?h!r>3!Qq&A?WBs)-_cziRM1A%GypBHQ(-u4^;y30yv0o8xi$6xjQg~}7)BsFTp`-l?d)>95Y}8WqN%F^n5bvB9x}QLZJ>_p-MU>J&mue!@fHoiACdc` zhknX zRSfftm&)uH&X8|S!ORQL%5Guj8X+N?CoHn$smc~$k4+MJq@x+f!I*Qxc*f=RQHayH zNDA%Qm}8}}%Q)uXvf7lC>Ax4`zsh@)#K=MF)E~hCqW+;0|NN1=Jaf#LSwWBG$Asy% zdjUz>H0tyf`P=)6r-iZ)hBE;=np3iwC6yYX%g`tI?$1vlRk-cT275s!P=O#+(#C6V z^dt zUCWQhfoEggH9%|+^lr?WD|GGL{nGZKU)?OL>apdw9V$IdY}%3DEBQ>WQC3C^q4*IRcy?G!myA%qA(Zrxd#V% zj#wOT|Kc50gfDULc+zy6hYAXysZu0FY#ffFw(HmJq7dcXDiD2A_oBn}c?xLuAk>E(rc^U5i=2ljvODP$K|whqv2 z$c2Q=2G-^!rhJFcHp%;$lY{03BOmlXJ;nykyQF~?M!ZS-6fT$aPttS zYsQJd_p;2yS#7_vZCgTvhOR`W`IZ~E&>PBUMy#f9YM1|bzeMyL43yq_UTegTrE#4k z#RwmgdmNvRhDD^}UxzhkJjVFV^E9Tfd8rV?@Fjz1iC@y-7n1dzjhL?Xfv^YcHDCvfNiid)t*a-49m8hjF7>q=d0W67Lh+cDi;y2G5^8S)`cH*Do-N zE%)3)@maIN*XCAck?eMv-sn#$a}9FH$4bjD@k_lMD9ZHq&n@Q5u3fR1COTQ@rph=^ z)WI#@WFG*5k4Wgf;D%8QQ`nEkmr6Vy$}bxQUm`FypUF9M{KUv=ykzGk-QuU^g_R=jlzID#+Hc$~Tg39oaqfd#MV}GuS3Apg zhk8A9?Y+)Y28LEQZU0t&+qN^cJBuO+)ffKmS5v z*k1v|2W`%ZjQ%S7oOnXU`&*R@>t@*Jk}gQZqDaWqn^SIVw@Z)M{N2OGh9Va zpNVLn$^~m#!e8U3pE0eFzCHz8NaAjL=^v^0*iCTX0qJiOYnoFr=7YPQRdh%|joL|ngm4ZV{@b=vq?qJNWRV6 zxF5JJ^hZYXs;qzKAmf@cj>~o8L~?q^l413W?fjpY6LL;IW;-j049U} zBRo6Kee2IF_CX2hMe@w|SDUbn?@IUmD|WSi4%@VbKKD4)y~=#=oqIXf_WbodGx0_H z2C0Gnax}ygdy9W9%?9OU3fKi=gjtq;W(K+s<08z3MsG&*dRY)mre}Y*x9-~P-$Kl8 zntgD-zTuweX~FFr%}aJTPEH5(b3G^n=azlO)-}T z8kublB={8f{ZPcqmC?V2h)uSToXlb}gwsN3w5DHtxxYnLZ|-qYbJ+JcSMvV~=i35X z&o2vER|-$}?`mVN-9GsCzj)1!DZ3go6)zI;kF+v-UWkw?CQeZc1TI$}lzoyFzZv!& zZmgv3^b?ec)##l zb9Qf?(oTv$SiH&ni#Q{h8peo=k}!Df6K50#NQE$8T)pyzXSk1%rc-lOw5;-taurDg zUj4VW^(wxc5`$G@J73UJ(UX{fPhP8CX|k3o!LH5%Eb?f-5QP3Q`)0n80}@2At3M1@ zG29klA>jD~iKua&D(1D#jsPv`N3PG#e0}~&nyrk!uOuP7Xk@sG@KN(gG`ddv;+@mc zzOX>o`u!5|biZy@voGjhLQuRw|W!!YNa~XezW^j3E zccu8IRAZXkYrgR1eOyHUj)xYnMUF|9KO$w~@GZ@cF11GU$Pwk7wJsrdKV6IGH;l*1 z^0kkNrBl6k-2K16E03~)N%1>w53ZKZgu2)Uh6#P9u6iWclpjKjvLAcERx9#3_Desn zWZYbA8kJ5e4csv2-h-Q@YP#thWCA4rD#))kspO7qNX>gJ?3oV(yWEC#ccK2 z(Z1Or#M{nQ&cl3dZ6&(WqQ?)Tlhv}nE$sn(Egc__=JTKQUYoG=m%^z?u2`)pE4$@%EJXI5T`}<_wJ20A>O!TnL@Eqr^h?^?fBv(W&+?+toP^PwVU_j#Rb3= zW>u2#F#g~t%|BRNi}s?=UP6#<#>w3sGcA`7;yWVR8Ec=Nkd-HINf93JU1l@Iy-%bz z{7rfdUbNHl8aB?fBH3Hl?i?A!YWyl)Yz+NwIHR}{(kvbS{KKgP_B4zKN#HGd!Om`| zu(;{LgSTw}FSiXcZcB>o$&Hw{4@!CjJ^^rzw0$By-{0N&YKP5>L|D6q*b&t9HaY@1 zcIWXV{)ozx+v3I_D31G^G&)!}q=PE_8fQ8lu^aYzi`esdMvDo(#p%YQa&pRUFE_3D zZM0X!Q^^@2_}zX#?~Y`+3S|Bida6zHMe~ZnkK6o-*=pCX!$-|UAq0~%nG2}OTgIiF zcoS+B(+^avDb}PO!k^Em?D&h3vh(T9NDZ&PwLsrSTUL;?YWBt&Zqjw$-z?kVK%}nW z?`Y(YKVcCvy_W-jBiqH_3Ej+h4ophgr%-Aa{x-ZS)9yLL{#aIO%@oHd%M}hP`55${ z$ERV=G$Z2Es^T*C234xR%$y;{>%9H`JK^}GLF(RCKtk~rabGs#`iHL$zkBPh@(LB@ zE|;7y-fUa$;CX!eZvxYYdZSp0(QRB;$c%v^gT(Uf%!^({$FJ8-;#|)cCVVpk+XH`I zJuhn_&Jgf>iu?Q9EX{R+VeJ~>MzLdX^iZO1bfFNdVP(WXnNsXnk@NdjW~bHHt&OaU zbf13lj1{vo{`=3r&lqD{W#0RBUR^JeZ5-=6O@BT1mbvBneo5^xRWVNY3#a*gLfw=R zHbkez55woCM}tdg|Hnk7~QG=P#j?%%9`D+erB{BoY>xx!0|2xrs& zV!&jflJPsot|!4_Pn2oVcgA)lPdxLUcZ1LAf^CNT!+_b&0X)u(o%gYfw9=wrAZpB6 zeB_p#2Zv8m#a*2*Fs1c5JDl~n4Z&{AObfgsE8q}`ut(Yn~$~8C3V@8>jLrlBKKfh!nm9sH> z{;49{HRQ{_@z?K~@y2+QfG>Fh*;48zvIcL#@Kl1<9*A|f4 zGDmcQkcwfy{`OmK#z*{z@iN{tqh@k<+SUU0#(wNfFy1T|$*gc)J9m)2t>X~-e8RQk zC!IYYC$17vFsQdh)@4G=`>Bxm`tL_8AZF?F=5Id^pTgaO6@7AF%NZ|8h3tR$J091* z0ZNNJ=E(OQSCmz}Qkvm}vngtyQcr_UslGNc>1$+I7qb`WnfVt3OXBjc%>U%Aos1=E zOme(`ReaYU|3`hha?yh3S+3HJ8TF1&6d(aFY%Nm}YbGCR1 zr&6XBb9Ta=rM{q$uQ|RA#_$Wb8_Ma%uJtY2MN!u4ZKlg(O+Tz!kM@kgPlBf$*~maM z^HL3-3a>!>JbVU!`0tnWnevu>{W-I&!V$%87iHAjSBQ3Z*<sjoB75A(gQlV-q(vew~t08*M9PLeqaFu-ICwVFC;rJ|7d;NfGr8X&V9JE`{+-5U<`w$^%^;3rZ2e}mCgDCbLv-jtN zZ84hGZ*M6(qW_A7U+Hy}PD)09Y5RI+812?98?@|L3vILfRlgG6Y(~x|x@mG6?;rKv z4r_jeeso^qPoVyAGM7g_d~ST@M0#eHdSUgI7l)x-N=0kvtv}1gTyG)jMuHi#O;s{I zr^@je&9Cz)=LfkSf3{Z({J6REd+6qeU!VKqOOk-v`tz9I6_mM~AAEV;>NB$`Gj?SB zsBbKeUU?QgZi}hdOaIEhRPKs-bv@j~6!0ZRZ!6ahfvkYdXid7Rna63tHngVCMi5NR zLV3G@t-AM&F>!VyJY6#z(>7*m%cohAh#AsIKM#H_)`4#)eH6Qlh69-gc>kl0>o3BT z<)3b`s0OBlYy_Ibz;9SDu6&O#%Wa1gf7pz5*jr>?jpdditsa%XY4>h5?Z4Xhd5H_s z3#m3(qwI_x*>@AZ4g>@=Uw!JD(c7P)OAK-+wj@ry!dp_M-tzLKr0L0yS=V|WggsU?FSM2+6G`D z@o|>2!_k1v)@3FC5*0CtIy|h)Qp;2`FxNouXe|1uBkNCK947rt_xR zR(eIaA|Sv&U$!9IEf}v_=@3F*u@22uhW`z=SA2>nJH?fyavYDI!oMXrS4TTPn5$q& z>h%FPgmR6mj`w!KIf}M+@OHJG+s@fA>uBcKu;{j&1cq4p05QUAuklu|T_n%Jc{}#MZ9Kr`maP#p1iV@pLHXmW(;~y1G>@%@ZMngx=&0D-r+&az2dwsGQ z^ewDZTlEl7KtRk+m>F*IN=-Ac12V_C!HS5DEEcyLtXxjDnxw}xtl=4e7e)*EbzAhk zamvwbxNOW{l;0kreHC@Nq4W#on#~+tJf%52keJ4tI{I4BmRzW8WFs^S6(5Ob{W99L z^<$=P%PMMLUg8DSq_E@E!v%N>4kKT;ReL@U-Hj_v7q1&?ycI0#;olRhn)G`4FJ8aziUnLR!l}j4ZF(xRszDA#6;{!b!dOgv z)l)N-5xO{ha0tEST$mNS5D7*>L=7atl^q;}5`av_wdeC^cEaM^`VZT|roHU;Jpm%+~EeUTs-FzTi zp(s29yb!=GeNNlW_^x%RHRVnbZO3}d@D-I~BIp<~nW0zeKKo#2Twt1JG>*2k*b z8UyUD4Fzo=3J2^yAJ;pnaBeSn6$q^pJZV1dAZG`6y=O#ec1k%mCM<1hF!W- zs&2}A+l0<>Deec{^VBc5$8(a}JcA*YjQs`o=G{T}uBiP+Bw*|e5!%jrJZwH5qN-|$ zZzFKExnN|eQqgh>#0R?Ak531=;7BfOi?=cmtd!~2d(tADOVIZf8;GJlgEU6PG0j}K4sCg+Gf-kB3(^O^J`Vr>@wrbwu?5J z%c+>6jU~$w^T`FW(IkzhAfwnWL14L%4@0_%dml72haad&e~PJU0SV=o-NH1%ro*Fj zFLyHWfx%W$H^EsSRHelc<1{;&tZ`A*w2_~%Sr@{YlFQdvwRsx~)9Jc+2#!%xdh%ok={#+{$_?{^aDLQ-hHGe&KIbQBf=XU9g@Ii6a zbqGBZ57o=EwK;5Lldy@u+0oIU{74)1Q>+w3h|{@xC5;Hr z1v;QO-~qqM5RQn}O>x3qdRGp3zXTX6&5GD(sEu}#f}frVFBev4#Rg!QF9nt^NdYQ z@nNk$Em$d2F%H>`*H7_rU)6zrlBCv9DhW_DXQ4fTvW?*ct0yins6Yx}9G%-edR}|} z3sC6Q0`+t`BIXG$F0DW0Y5n0>QD)ajy-wvQvS#t0F2(pDgqUSJ z*k%2$a-Gd)WC}kGMIM`Q zrX@|frvqvYSo5Ua5Q)?>hXIthho~TZc!?`JDqh(N8+Zj)##01FKgknbC)@*JK|Yqk5fW{K z`3dS3Rs8<720p%RZ=Wv*V>p#jgvCECYmu`qE3?tgpW6qJZmdc{~iR}PxePJ=Y& zL3vfVHwKlQMmNHpzhB1X`7AUrM&8URDbKqmp6i)Bjgiq+`NizW$&f;}EgScjngM@Iz6X z48g{$el}d_MR!@KNK?y^Ekq?@yEr=)aHEA|8PwWCs@27Zx8G(e%t?_mF`Q)3-8@C< zKjRf7rjV;-rKN+et^SxGfs45ZgJkzYU3&!Xe^xrh$G`cMY{}$K7R~25)R2o#~DaE1}0H6VJsl0_DaL4@<6EY4jphVk*$WdS}Aih_BDAHHN*>v;naVIDFeWTX!F)MT?K-nkJvm!~Z4@~Cj=@>fddF}|IYd%oVz?}3A zPljt*g&&(5faOCHvfe<{nt}Gzs*J>4wp)zdy8sQaozT(u=J0i;15V`p!unu>f(TB4 zMU`tCzhID6BxGF)u*1nXvn^F0YH>j6Bz!ChS+B-}uu>-`bUU5iEU6F~!OGx`u?Iu3g#$C4-+sG{`BOh|B-N{AkCY|@pJnzAG? z^A2N1#nna8FX5~?ry5t|7t(K0?w-m>Dj)rCc=F47J#r{}{D z7DQO;yPH;w8;lwJf0-C&RYX0xzX}mHI0tnGQj}f=GwV{nB{<(<-sfkF{+cr7uD9zY zp8rD5>sGoRCbJIYGTxXA9&+U{DYw0`Aua~!l2n|DK@H^n+oVsiIONtF)2lA}*#xtH zuYHKWrniZ}NAx|6=-i2b98pIR@6|kzel;%x0wmqw&E@X$2CLkT8dTZ)FQ) zsbFb>;%K;TW=mfXbDy*?&woU{2SUu4a zV~UaTJ!NUg_HNV2>nL<)=^lc@u`N|_rTD1P<=p06xlWfJ7i%JF(nPd0n0N(NHE8=9 zlF@pFjDn*3Y_~fjn(vxQTo6|UDJc1Kk);RWXIdb8z&zb}Cs`}isWcOED;W@2)jls! zmds_`lF%v0*eY_OWuqzjY&lr2kx$8zyG**ZSm6!cPVZCU&u~Fst-5t$wR@ELaG4Gi z9-IfkO-WIILs0$E2yI{K=A593LSK0)b)R2pYm`xN2ngB=V-yYMW~+2>edP87^$~M| zxvI)S!w$%r2XacGyVFTN)wdpVSLAuf5TQO319VYocbf3l&zpG^1`z6H=lp0!MIT6Y z#$fclY9jVIx^=1xJ$>Rf<;khuJ#mnM zd~hE3o#;XQipBO_+j*Pa;))7g$B zxgDi-PLhIvfw|B|RM3{|c)+m>=_ghI*QwuA!9L_b&P6XDrQ}BO_q3k)3oZzMl+>NK z3=e{}!SN8xx1@OwU?7#=ju%wl{m_nkyzxyItc65nUHGafukN99f$$1Dmu*xPLged0 z3LjMg!lf75ApHcHhqi(aEi{4voCA=UaUv}?kw^%p3808Cp zHzE&)O9r0Z)a4!0L4=#{^)#XD=~AbgCWhUuM?_g*`fqdjDk^A`%0#+#$?9?n?x6d8 zdKA?8F()d>quTt(E}HM;uRp197-khucs&d@B^PED@ZB~1X0Pg&ggTP)B$8I8WU)p> z5jNvkdQ}ZS`V+=xIFx#_i5duu=GU)frk#t*B~|~+A$YWPLkn}1&5j8KXC~BjOWPRC z8Ivj=3JYRdV6`>SGOu6)UyA$P7|YKv)UZT2ByrM3o{nYN*EX= zS^pVoum^<6^X4bBp|TY^q&U8+J>Z9#w1e@CtM;mRqW4OZu?aLSDwxf=$wJS6Zoqc_ zNG-iq2Jlky^DO3qZ7!t&29J&4Yn{E1oW6RGB?y zo;oq?XN-YGWS>wPeM+T)eeb)rRCEK}=16+Vi8mj=RUsFFwEynJPaj}te=RTynBepoRlsT$+w!BPplRXdk{C=acN2N>TE6S3qV1d%$`(<)~6CKwMIc0uA zvwFysCkkFpNh-XFI^cDVX8f|*l}zHmAFCk#OyD#$`EfYGr4;t8Oc}|nuc<`{hrV5? z;`NEpAEL2PGn@{K_KhmX-gC}I?R8C>9Jao@9*!K!x{WQh>YBv~cF#Vb_gH<| zJ((50heQu-FgpMnOFy7*&Ppd^6@lz52{%gje!0O4WLibS^2HkS>OQxoLDe4qWP|@} zB7Qg^c>@TF)T-_>!mrVf-L$daqkd5f$~o?Prc8uzZQP~hr_KWcvojDeTm~-z-4BfE z7&uT=KeDlTCfP@it*BvHqX+Wi7_X}7K?z+|>+(A|(dua_qKPdIDIFUn|FIPcEoglz zN9t z`$`V^WK?ETZ^Ml_hQluG4oONYyOLga5EaFH?x?aOTFBPsQ$Icv=zVn~^i3@K{OQlQ zKKg)|8W7Bb=UH`bkYko1qBAw>$;AvreFDB;AM-|c9FxhGcl(p3KLwfwMLRW)s_OIc zO`R}2j)xd3mXK4i3careo8n}8X#+7;v zMDG>~v+7UZ)>4|P^ybb3ZUwy=#nXPauH=^*KieSzN*4ZmDx)Fna8+VZq7e>I@72Yd z|3*;(JH5pmV>;U!Oo_rB)MhqdP=5FZqGC8+QPd)4aD)#L2^}CMF5FzTI{kX{nX>8_ zN;zeM%B9M{2^YZ?636qDwL;ms21QTw9*4SF#DTJ3L2>N23Lki(KMvx0qLHE z=P^d2Pv1Pf bJPUV#C5_$g9o#&1;O5{#l5sED^I!fjfHEuDOEH_HBn6G%JxM@9( z*JzU(uykgp!E~NTCarC04d)2!C$Hc#;YKO`B~g;8e=YT1=`Z!*>rm>*f3=Eeu~1j` z;>8FC{ja3j;0JJBjoxUrB{j+BS&oxM(@6Kj0~LPiaOkilZy7pbmSf%NJ~xeIJh0$f zm4NL>4SjS>Sn`w9gOCB(%V)~S_Ux#+Zr4Pk^EBj=ZyXkWDU)y%E(B*cbvZH0y==|4b|2=D6%F_GRY~wIBs?BDOK{hN1x~8u$@oKfUchuqmXyi``Wn zBZ9P7*ESV)ktrG6_<;^~Q{7%wVO?!iu>$I^g#jjnR`QD-P0^=zV<<^)^--yO$CxJc zvvj#aXdRy|#lplR0ocfY^B`js&7f_`ym?%K{(LUDMg;cgC=LCIJ@ak1x3!ExO27lg z8_wokb}r|mG$q@J&g`1C>(xR>j4IfAFe#;2>G>-pL}2ha>VrcnmsOWA(?LDAZAYT# zB@3YO;mS*_lLtUV3a_>9uIHqtSw12k-SoSv>=SD8CYT;3x~SWt<8XkN(HXMT5EkT0 zRY)i8&xbVe`LY;4}sCxpdV=ZR$7R@F%^Z5DO=ds83@)*_psDRuy2N9bSe=UR5hT~ zBj`{NmSSd;tBz&tkHXP8jwZ|Kmwxqk2Rbs=@loi)L=HjZk@Widu~z(lP=tH=&te32 zMjdw4__-u>-N%m~!6bdB<~V`B_Aw=YnD{|H7=OJXT8&l`ZRICR#k%Zd1LnY2Xt-!E zO5JXuo&BWY*ESR0Reqh%rXcTD5>HM&o9l!ZWNMnV|NOBrU5GY6?|EWbe;K!@>j-gU_lkIX8^Av7SrX-6lqMZZ3({T#beD6yLI>H z^NXw=Yv_z_JrAo)b!8u)Yh z@<+|NU`fvw-~Atm7(*L4wbANta{d!$4gRCvPTlqS9F>%c3bZ@JBv|K!J2^NH!CJug zo?qw{ebr@@`R!lmW#|hU@TnZ17Pl>T7<;{o(VQ$Db*b$T@lfV60WU2jOn{DpKZan9 zO3BS{F7lvVe~b@I9%F~Q1-s!79Z@A#f+;_a4Ia*kTk>91s;r9nM}Pw;gpF|O9H7oP z=u$1m{HqS`uJ4g1szO%$#B z$b1cfQ6@wfxvrKHPjvN2Ff>%mx%8om>U1Tggc-SA(W^pM?Z0EMWphj)%k1W zKWWkCS;sD^h8i=JC5&S5jGXSHa3PlN#!jgG6^wTHG}%_WE5%o|pNJT{kfCX?_8XkdkSK{4fykZyNoSQ^CcSpBy|cehzp^ z^Jwt(_lf~HX#lNE717iQx^=b1a(@keZI}P`tij)1J*&^cDmj1^M`zIG)n!7a%7D#! zE%MTOVH-4yzT-27s%jDrt7$M>{erhLu-(u9SXK^IAaYPnMoZWXkxznoujx;eNJK`+ ztwr+#QJwL=aRVP_ff2hsp#yP%zM7Io&?dS{*ZwFz5%8L52vXF$<3JuVcf^{G_!QOW z1`wnZzPjlTgw1+;AnC_9_DQ{bHWK1=DNa?oot~U%EK2SG`7jx9>k%|s=?3lJ>dH>` zC&j5GeeC9%PiT^!@vVSQ{&-MQ82e9TR1x znk{d~5+LJEE{#BV(I=V)-ItHTi_tI2XPs^w(SrWHkj#h#bivBmn;m7oFs6m1(|Rbd zA!WjBjMI)h@$!Rpt0Ex)5B1taG4KP1+A3-WLJW+ zOvAQ+bY!7ldvn+jLs7=HnoEG{8$OGdbdU>Ic`E4N3O-m}ae-svY)2HZ%%mW~6`axy zXnEbu7>lkquNi42{*mM(&)V4&Nj+?2)G_q5qfehePCX~}j6vC5yiE=0AiCdIduiCz z%ZhBIq+!jev8wU1{E;r7ffB%=>tjm5N^?3f+7RrM?c<(}e)flXywjJQyQpg+)yTQT zjJd^SJqPUrlUTtZH|ZgZHd1IuL-loG42|yj?}R^}jpGcNuiGMBM_eI&iO7jzia3aj zPD3=00OrOLRjDG*%ErorG%ZbtWl358j>(URzmAeu9OSl8uD4 z3VONpdm08+Wf3%=h!4)|R6O;6X^bywfWAW(JM!skR&+ohdM|twg9B*)1EvI&pYYRJ z^?n9SUwLX7;sY}-HcSiV)|BadU*_8JpVncYo78Bj-&*_3=wURYF5CygQ0bO_otS&v zpDusDh0}!Fz|#$uj_}(<6222Ahv_G zL{iCTDu|^Dug<9+FL6ssebB_RMjF8WD2}?#-Oy;cq0D|YBXwZGGld>0$Bq;da8{EW zj4A=frlQrXNKJ3ep*+rD`rH!A!>)1Y+svP*&N~Nkh_TH2iEy~OfM7o3pYbZ)o(4sr z05(jNpfZQMt*R2cT*WdoAp3#mH-8_X*dC&U`U?%1ui7gn3l|U={jM%B8-~&^$V+XjZfkseCFExWX+I*L}vdY?2q$2gOWtPb;EXW$slpn6Uxm9Ac&!=%w^!q-QU=|U;q|N! zKUp65d@iX-p1o!(e)z3m5`Go0^zOkGHOg6IRR5uH31Fj%{P)(Gu?(xmx3|Fxm7F6C zuqpmK%io^N#94nYo>i83s!Jb`mHS`oMIc?RRAHKY1JwQ~C$5eKxypP%(RI{4!EDx4K}!a}2fHRW;D2`luy0v$V?u=wyaYA?b#K8y0KU z7xV|(hmJPBOmUUZKqboqcY z`hxEa;7fN|9~#Mn*Ebidu&I8)XsTM#aJASSWZp;YZAo=>KR@N1t8Us}Ri{J*v|+Rs z@Jnuk4?w6=TD*70#U(c}sw+d}1OQo^l(Ji2o>fz0{~8Lzgr&yj^ns+h-6AZyNM?=c z%En)gSrh~I^Z~5e0SkdeDa+xdzAnVMTQOi$vqtUNsvw2j!+F85I-)yq2y3G*cl-w& z(Rnj%@X}coe^ODJ@SuT5fU$YP{ZXl-!c7Zrb`XQ!th9L9VdU*d^KuFxFI$phtvrib zD~SoNFkhs^Nxzlo_Q< zDNiN$bGE2}sqUo&VIMZ$emAqZ=CXK+jNtJcPOnG^P0sJ$8{DGx3X3 zK=Z?FJ#2)p_&?)104vd1-47}iDXl1I(J}cL6_BGQjORpu&fq$gN2(^L_i*)X9 zJCjGSKM=?e`k>P-9Q9zYQiHRGD(x|6h!cOk?aVY6#M1LFmKX*hE0Inrt&+I$44q=K zChIm}hcWR9<-e-DnlpAhEp{BwF0Jal{lcW=A5l_R0LHIs-)u|8^2HeGgsFYV<*W$2 zkM7n3tlf#KJ3`A;+y3Aferh0XFcZxuFyD}}&eY~$Hr=Ic8+e}*3M+-YPQO{Bmh~Ou{17sc@v##U^ymyu)9l*Z z*>n<7T&2P#Tp$2=$SR7qHq$lXj0vz)Z?JDxcYGV#_5Ja^bFjX!st`i}s|qZwffzT$ zYi6fW`uk~ooVDuAi9UZG?n?obqOU7fN2=qN*blIvP}Ty&J(Db8gtI`K=B)yHu}_nF zEn*8PRw{p+BJ#*-cM-CkvWJ^g-9{SA+_hAnt_2@JE#08Q_X$jV%>(-B#5VO)PRBDYMCV(y zi6hJwNs{V6j%g?Yj^48o=wiVZOLFTJjN9o+-lX-VN>NMFWEJ=6;|mMm;|pl5d{Ak@ zT+PRDW_=hvXWO7qnTdv@Q`nR#;1~{Xc=*$JpewxkVD;;-~!pUWA`@}cT6mp9(P84-eUphXpb33c4VlvY}UF@a^bb?B;Ejj2%vdvaZ z7?Bwhc~j@ug|>>NHA{itE<0z-60!`vW1%t!&%Ox0{-oV~X!gyNP04^R%3$r#46sCb z?;K$DO3=0S_MYAB^eY#I>=BEol?SaY}$DvEqX=!}%K9tU5mzjJh5d zf6oK-Kh7ckb?p8V5bi-vSU*9hJcgGaq`6bm6>U8?u@!8q+JDf)_J}yeR z*!hR6q-wy&g6Um^Q4|-eeNFKt#Qv$ubYcTEo8O+g;iDls#&IaVV4s(BE-Xl_F}h(|j|vG2zFbl)V`M|M^e7h8Xo=iE>#aa&U+nUMCmuRac& zgUdeyx057}M)7y+^hT4t%5rr_U&WW!sA&lbwt8z@s+$@yJV!&7udSf}22*10wuAlM zAq7}=A8hAZqqPlG8^;|?Ur(M~nzTVBnH2J2)qB}b&2rM`;W2WReWXpnuSG0=07W5mKn zefyw#64zeTVBy{hv?v^@{j#I~iN|wsWAc-%_VrCT2F7|?X)In(*ep4|PMtqkpFEk* zZ#dDrF4yv#8vN^;l2@`eT@Z-yxk4FO>#w=$monxHD=t?TC%p_Do5kt_1nZZw=i9R8M^s(p z@Vz%ldJEpV3xJZn(8*KT26os@Zy;e`dpl5e=&ehRn#+NfOJiZfj^4u85|+LFD9lD+XsFH6VfSM_E|^M%=j2brZEsC&$FQAxeQNt$YMVOGwA69a znxwXMQ`?Nx_TkjFU25ASwXL1nrpDj2%+z*LYCH5F$JI}5+oZP5{?Y#L->LpjE1ud$ z|8f3%QrmK=ZEBoMyE(PJIJK?zkN(yApZ%YjdVbUHNNrQ&Y+9>-+-Hr{{>M|lE2Or4 z|Iy!nKc|29x5YoM-{XJ$_UH4GY=il|gg?KVg|K+B|N8gW|G#}$5B}TViu~Pp={Jk- z{cpcB9-H>>_EP`n_J6PHmkmuB4kQ0_vg`kSuK&3J4{h!5mOpC1u>ZE}r|pW`_2T@& z!~5qq{`X$#4~{+2a`yFi7r9rb|Hoce4apzVazL+MBis9{@qbR4ky~UR|Hsaq{vSJE zII>Tl-v8OBd6D~EH*$FXh>?9d4j(mO$bhlt=Ka53{$KCpe?Fq#zL7IxK(EoGf)f{E zBxUs(^|Lk%@k-JSo3AVA-=|>E@L>an`aSW-@*Vku{~uSo z=D#1&V!+@&MV$58|JtYH=uu*)tNIQY)2H`;eQEz|himTaKcJw0`+R@+jA;?hy1$et7Ue z{DGSKT{<{Z>Zvy#IdVY1p@CD7YAluScg62$=h1ye_i5K>XunbY8|$WjU*gJ9{)aF) zKHT>l{D+=;dFF4KqlXUlKZ5K5zJI{q%l5f_^nk$wdNFHe4fP*>|6!y4(|6I_>7O6p u|2pu0xBUB8z{2GK literal 0 HcmV?d00001 diff --git a/man/cr_microglial.Rd b/man/cr_microglial.Rd new file mode 100644 index 0000000..80d80bb --- /dev/null +++ b/man/cr_microglial.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/package.R +\docType{data} +\name{cr_microglial} +\alias{cr_microglial} +\title{CogapsResult object for microglial_counts} +\format{ +A CogapsResult object +} +\usage{ +cr_microglial +} +\description{ +cr_microglia contains the output of the CoGAPS function in the +CoGAPS package for data = microglial_counts +} +\keyword{datasets} diff --git a/man/pdVolcano.Rd b/man/pdVolcano.Rd index 7a4967e..3127841 100644 --- a/man/pdVolcano.Rd +++ b/man/pdVolcano.Rd @@ -10,11 +10,12 @@ pdVolcano( pvalue = NULL, subset = NULL, filter.inf = FALSE, - label.num = 5 + label.num = 5, + display = T ) } \arguments{ -\item{result}{result output from projectionDriveR function with PI method selected} +\item{result}{result output from projectionDriveR function with PV mode selected} \item{FC}{fold change threshold, default at 0.2} @@ -25,6 +26,8 @@ pdVolcano( \item{filter.inf}{remove genes that have pvalues below machine double minimum value} \item{label.num}{Number of genes to label on either side of the volcano plot, default 5} + +\item{display}{boolean. Whether or not to plot and display volcano plots} } \value{ A list with weighted and unweighted differential expression metrics diff --git a/man/plotVolcano.Rd b/man/plotVolcano.Rd new file mode 100644 index 0000000..8af23e5 --- /dev/null +++ b/man/plotVolcano.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plotVolcano} +\alias{plotVolcano} +\title{plotVolcano} +\usage{ +plotVolcano(stats, metadata, FC, pvalue, title) +} +\arguments{ +\item{stats}{data frame with differential expression statistics} + +\item{metadata}{#metadata from pdVolcano} + +\item{FC}{Fold change threshold} + +\item{pvalue}{p value threshold} + +\item{title}{plot title} +} +\description{ +Volcano plotting function +} diff --git a/tests/testthat/test_projectR.R b/tests/testthat/test_projectR.R index 99a18c1..3acd3d4 100644 --- a/tests/testthat/test_projectR.R +++ b/tests/testthat/test_projectR.R @@ -85,7 +85,7 @@ test_that("results are correctly formatted for confidence interval mode",{ expect_is(drivers, "list") #check length of dfs -expect_length(drivers, 5) +expect_length(drivers, 6) expect_length(drivers$mean_ci, 3) expect_length(drivers$weighted_mean_ci, 3) @@ -104,6 +104,9 @@ expect_is(drivers$mean_ci, "data.frame") expect_true("weighted_mean_ci" %in% names(drivers)) expect_is(drivers$mean_ci, "data.frame") +expect_true("normalized_weights" %in% names(drivers)) +expect_is(drivers$normalized_weights, "numeric") + expect_true("sig_genes" %in% names(drivers)) expect_is(drivers$sig_genes, "list") expect_length(drivers$sig_genes, 3) @@ -148,9 +151,9 @@ test_that("results are correctly formatted for P value mode",{ expect_is(drivers, "list") #check length of dfs - expect_length(drivers, 4) - expect_length(drivers$mean_stats, 7) - expect_length(drivers$weighted_mean_stats, 7) + expect_length(drivers, 9) + expect_length(drivers$mean_stats, 10) + expect_length(drivers$weighted_mean_stats, 10) #check that genes used for calculations overlap both datasets and loadings expect_true(unique(drivers$mean_stats$gene %in% rownames(microglial_counts))) @@ -167,6 +170,9 @@ test_that("results are correctly formatted for P value mode",{ expect_true("weighted_mean_stats" %in% names(drivers)) expect_is(drivers$mean_stats, "data.frame") + expect_true("normalized_weights" %in% names(drivers)) + expect_is(drivers$normalized_weights, "numeric") + expect_true("sig_genes" %in% names(drivers)) expect_is(drivers$sig_genes, "list") expect_length(drivers$sig_genes, 3) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index 16410d5..dcc4b98 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -382,48 +382,63 @@ The arguments for projectionDriveR are: **`loadings`** Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space **`loadingsNames`** Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used **`pattern_name`** the column name of the loadings by which the features will be weighted -**`pvalue`** Determines the significance of the confidence interval to be calculated between the difference of means +**`pvalue`** Determines the significance of the confidence interval to be calculated between the difference of means. Default 1e-5 **`display`** Boolean. Whether or not to plot the estimates of significant features. Default = T **`normalize_pattern`** Boolean. Whether or not to normalize the average feature weight. Default = T **`mode`** 'CI' or 'PV'. Specifies whether to run projectionDriveR in confidence interval mode or to generate p values. Default = "CI" ### Output -The output of `projectionDriveR` is a list of length five `mean_ci` holds the confidence intervals for the difference in means for all features, `weighted_mean_ci` holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, `sig_genes` is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (`unweighted_sig_genes`), the weighted mean confidence intervals (`weighted_sig_genes`) and genes shared between the two (`significant_shared_genes`) . `plotted_ci` returns the ggplot figure of the confidence intervals, see `plotConfidenceIntervals` for documentation. +The output of `projectionDriveR` in confidence interval mode ('CI') is a list of length six `mean_ci` holds the confidence intervals for the difference in means for all features, `weighted_mean_ci` holds the confidence intervals for the weighted difference in means for all features, and `normalized_weights` are the weights themselves. In addition, `sig_genes` is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (`unweighted_sig_genes`), the weighted mean confidence intervals (`weighted_sig_genes`) and genes shared between the two (`significant_shared_genes`) . `plotted_ci` returns the ggplot figure of the confidence intervals, see `plotConfidenceIntervals` for documentation. `meta_data` contains matrix names and pvalue thresholds. The output of `projectionDriveR` in p value mode ('PV') is a list of length nine. `meta_data`, `sig_genes` and `normalized_weights` are similar between modes. `mean_stats` and `weighted_mean_stats` contains summary information for welch t-tests. `difexpgenes` and `weighted_difexpgenes` are filtered dataframes containing differentially expressed genes at a FC and pvalue cut off of 0.2 and 1e-5 respectively. `fgseavecs` contain unweighted and weighted named vectors of welch-t test estimates that can be used with fgsea. `plt` returns the volcano ggplot figure. See `pdVolcano` for documentation. FC and pvalue can be manually altered by calling pdVolcano on projectionDriveR result. ### Identifying differential features associated with learned patterns - ```{r projectionDriver, message = F, out.width="100%"} options(width = 60) library(projectR) library(dplyr, warn.conflicts = F) -#gene weights x pattern -data("retinal_patterns") - #size-normed, log expression data("microglial_counts") #size-normed, log expression data("glial_counts") +#5 pattern cogaps object generated on microglial_counts +data("cr_microglial") +microglial_fl <- cr_microglial@featureLoadings + #the features by which to weight the difference in expression -pattern_to_weight <- "Pattern.24" -drivers <- projectionDriveR(microglial_counts, #expression matrix +pattern_to_weight <- "Pattern_1" +drivers_ci <- projectionDriveR(microglial_counts, #expression matrix glial_counts, #expression matrix - loadings = retinal_patterns, #feature x pattern dataframe + loadings = microglial_fl, #feature x pattern dataframe loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction - display = F, + display = T, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode -conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,] +conf_intervals <- drivers_ci$mean_ci[drivers_ci$sig_genes$significant_shared_genes,] + str(conf_intervals) +drivers_pv <- projectionDriveR(microglial_counts, #expression matrix + glial_counts, #expression matrix + loadings = microglial_fl, #feature x pattern dataframe + loadingsNames = NULL, + pattern_name = pattern_to_weight, #column name + pvalue = 1e-5, #pvalue before bonferroni correction + display = T, + normalize_pattern = T, #normalize feature weights + mode = "PV") #confidence interval mode + +difexp <- drivers_pv$difexpgenes +str(difexp) + + ``` ## plotConfidenceIntervals @@ -462,7 +477,7 @@ conf_intervals$label_name[idx] <- gene_ids #the labels above can now be used as ggplot aesthetics plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals sort = F, #should genes be sorted by estimates - weights = drivers$normalized_weights[rownames(conf_intervals)], + weights = drivers_ci$normalized_weights[rownames(conf_intervals)], pattern_name = pattern_to_weight, weights_clip = 0.99, weights_vis_norm = "none") @@ -473,7 +488,7 @@ pl1 <- plots_list[["ci_estimates_plot"]] + pl2 <- plots_list[["weights_heatmap"]] #now plot the weighted differences -weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,] +weighted_conf_intervals <- drivers_ci$weighted_mean_ci[gene_order,] plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, sort = F, pattern_name = pattern_to_weight, @@ -488,7 +503,74 @@ cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3 ``` +## pdVolcano +### Input +The arguments for pdVolcano are: + +**`result`** Output from projectionDriveR function with PV mode selected +**`FC`** fold change threshold, default at 0.2 +**`pvalue`** significance threshold, default set to pvalue stored in projectionDriveR output +**`subset`** optional vector of gene names to subset the result by +**`filter.inf`** Boolean. If TRUE will remove genes that have pvalues below machine double minimum value +**`label.num`** number of genes to label on either end of volcano plot, default to 5 (10 total) +**`display`** Boolean. Default TRUE, will print volcano plots using cowplot grid_arrange + +### Output +Generates the same output as projectionDriveR. Allows manual updates to pvalue and FC thresholds and can accept gene lists of interest to subset results. + +### Customize plotting of confidence intervals + +```{r fig.width=10, fig.height=11} +suppressWarnings(library(cowplot)) +library(fgsea) +library(msigdbr) +#manually change FC and pvalue threshold from defaults 0.2, 1e-5 +drivers_pv_mod <- pdVolcano(drivers_pv, FC = 0.5, pvalue = 1e-7) + +difexp_mod <- drivers_pv_mod$difexpgenes +str(difexp_mod) + +#fgsea application + +#extract unweighted fgsea vector +fgseavec <- drivers_pv$fgseavecs$unweightedvec +#split into enrichment groups, negative estimates are enriched in the reference matrix (glial), positive are enriched in the test matrix (microglial), take abs value +glial_fgsea_vec <- abs(fgseavec[which(fgseavec < 0)]) +microglial_fgsea_vec <- abs(fgseavec[which(fgseavec > 0)]) + +#get FGSEA pathways - selecting subcategory C8 for cell types +msigdbr_list = msigdbr::msigdbr(species = "Mus musculus", category = "C8") +pathways = split(x = msigdbr_list$ensembl_gene, f = msigdbr_list$gs_name) + +#run fgsea scoreType positive, all values will be positive +glial_fgsea <- fgsea::fgsea(pathways, glial_fgsea_vec, scoreType = "pos") +#tidy +glial_fgseaResTidy <- glial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% + as_tibble() %>% + dplyr::arrange(desc(size)) +#plot +glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + +coord_flip() + +labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + +theme_minimal() +glial_EnrichmentPlot + +microglial_fgsea <- fgsea::fgsea(pathways, microglial_fgsea_vec, scoreType = "pos") +#tidy +microglial_fgseaResTidy <- microglial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% + as_tibble() %>% + dplyr::arrange(desc(size)) + +microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + +coord_flip() + +labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + +theme_minimal() +microglial_EnrichmentPlot + + + +``` ## multivariateAnalysisR This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. `multivariateAnalysisR` performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function. diff --git a/vignettes/projectR.tex b/vignettes/projectR.tex new file mode 100644 index 0000000..5789790 --- /dev/null +++ b/vignettes/projectR.tex @@ -0,0 +1,758 @@ +\documentclass[]{article} +\usepackage{lmodern} +\usepackage{amssymb,amsmath} +\usepackage{ifxetex,ifluatex} +\usepackage{fixltx2e} % provides \textsubscript +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} +\else % if luatex or xelatex + \ifxetex + \usepackage{mathspec} + \else + \usepackage{fontspec} + \fi + \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} +\fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +% use microtype if available +\IfFileExists{microtype.sty}{% +\usepackage{microtype} +\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} + + +\usepackage{longtable,booktabs} +\usepackage{graphicx} +% grffile has become a legacy package: https://ctan.org/pkg/grffile +\IfFileExists{grffile.sty}{% +\usepackage{grffile} +}{} +\makeatletter +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} +\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} +\makeatother +% Scale images if necessary, so that they will not overflow the page +% margins by default, and it is still possible to overwrite the defaults +% using explicit options in \includegraphics[width, height, ...]{} +\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} +\IfFileExists{parskip.sty}{% +\usepackage{parskip} +}{% else +\setlength{\parindent}{0pt} +\setlength{\parskip}{6pt plus 2pt minus 1pt} +} +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +\setcounter{secnumdepth}{5} + +%%% Use protect on footnotes to avoid problems with footnotes in titles +\let\rmarkdownfootnote\footnote% +\def\footnote{\protect\rmarkdownfootnote} + +%%% Change title format to be more compact +\usepackage{titling} + +% Create subtitle command for use in maketitle +\providecommand{\subtitle}[1]{ + \posttitle{ + \begin{center}\large#1\end{center} + } +} + +\setlength{\droptitle}{-2em} + +\RequirePackage[]{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/Bioconductor} + +\bioctitle[]{projectR Vignette} + \pretitle{\vspace{\droptitle}\centering\huge} + \posttitle{\par} +\author{Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien} + \preauthor{\centering\large\emph} + \postauthor{\par} + \predate{\centering\large\emph} + \postdate{\par} + \date{20 May 2022} + +% code highlighting +\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251} +\makeatletter +\@ifundefined{AddToHook}{}{\AddToHook{package/xcolor/after}{\definecolor{fgcolor}{rgb}{0.251, 0.251, 0.251}}} +\makeatother +\newcommand{\hlnum}[1]{\textcolor[rgb]{0.816,0.125,0.439}{#1}}% +\newcommand{\hlstr}[1]{\textcolor[rgb]{0.251,0.627,0.251}{#1}}% +\newcommand{\hlcom}[1]{\textcolor[rgb]{0.502,0.502,0.502}{\textit{#1}}}% +\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% +\newcommand{\hlstd}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% +\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.125,0.125,0.941}{#1}}% +\newcommand{\hlkwb}[1]{\textcolor[rgb]{0,0,0}{#1}}% +\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.251,0.251,0.251}{#1}}% +\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.878,0.439,0.125}{#1}}% +\let\hlipl\hlkwb +% +\usepackage{fancyvrb} +\newcommand{\VerbBar}{|} +\newcommand{\VERB}{\Verb[commandchars=\\\{\}]} +\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} +% +\newenvironment{Shaded}{\begin{myshaded}}{\end{myshaded}} +% set background for result chunks +\let\oldverbatim\verbatim +\renewenvironment{verbatim}{\color{codecolor}\begin{myshaded}\begin{oldverbatim}}{\end{oldverbatim}\end{myshaded}} +% +\newcommand{\KeywordTok}[1]{\hlkwd{#1}} +\newcommand{\DataTypeTok}[1]{\hlkwc{#1}} +\newcommand{\DecValTok}[1]{\hlnum{#1}} +\newcommand{\BaseNTok}[1]{\hlnum{#1}} +\newcommand{\FloatTok}[1]{\hlnum{#1}} +\newcommand{\ConstantTok}[1]{\hlnum{#1}} +\newcommand{\CharTok}[1]{\hlstr{#1}} +\newcommand{\SpecialCharTok}[1]{\hlstr{#1}} +\newcommand{\StringTok}[1]{\hlstr{#1}} +\newcommand{\VerbatimStringTok}[1]{\hlstr{#1}} +\newcommand{\SpecialStringTok}[1]{\hlstr{#1}} +\newcommand{\ImportTok}[1]{{#1}} +\newcommand{\CommentTok}[1]{\hlcom{#1}} +\newcommand{\DocumentationTok}[1]{\hlcom{#1}} +\newcommand{\AnnotationTok}[1]{\hlcom{#1}} +\newcommand{\CommentVarTok}[1]{\hlcom{#1}} +\newcommand{\OtherTok}[1]{{#1}} +\newcommand{\FunctionTok}[1]{\hlstd{#1}} +\newcommand{\VariableTok}[1]{\hlstd{#1}} +\newcommand{\ControlFlowTok}[1]{\hlkwd{#1}} +\newcommand{\OperatorTok}[1]{\hlopt{#1}} +\newcommand{\BuiltInTok}[1]{{#1}} +\newcommand{\ExtensionTok}[1]{{#1}} +\newcommand{\PreprocessorTok}[1]{\textit{#1}} +\newcommand{\AttributeTok}[1]{{#1}} +\newcommand{\RegionMarkerTok}[1]{{#1}} +\newcommand{\InformationTok}[1]{\textcolor{messagecolor}{#1}} +\newcommand{\WarningTok}[1]{\textcolor{warningcolor}{#1}} +\newcommand{\AlertTok}[1]{\textcolor{errorcolor}{#1}} +\newcommand{\ErrorTok}[1]{\textcolor{errorcolor}{#1}} +\newcommand{\NormalTok}[1]{\hlstd{#1}} +% +\AtBeginDocument{\bibliographystyle{C:/Users/Gaurav/Documents/R/win-library/4.0/BiocStyle/resources/tex/unsrturl}} + + +\begin{document} +\maketitle + + +{ +\setcounter{tocdepth}{2} +\tableofcontents +\newpage +} +\hypertarget{introduction}{% +\section{Introduction}\label{introduction}} + +Technological advances continue to spur the exponential growth of biological data as illustrated by the rise of the omics---genomics, transcriptomics, epigenomics, proteomics, etc.---each with there own high throughput technologies. In order to leverage the full power of these resources, methods to integrate multiple data sets and data types must be developed. The reciprocal nature of the genomic, transcriptomic, epigenomic, and proteomic biology requires that the data provides a complementary view of cellular function and regulatory organization; however, the technical heterogeneity and massive size of high-throughput data even within a particular omic makes integrated analysis challenging. To address these challenges, we developed projectR, an R package for integrated analysis of high dimensional omic data. projectR uses the relationships defined within a given high dimensional data set, to interrogate related biological phenomena in an entirely new data set. By relying on relative comparisons within data type, projectR is able to circumvent many issues arising from technological variation. For a more extensive example of how the tools in the projectR package can be used for \emph{in silico} experiments, or additional information on the algorithm, see \href{https://www.sciencedirect.com/science/article/pii/S2405471219301462}{Stein-O'Brien, et al}. + +\hypertarget{getting-started-with-projectr}{% +\section{Getting started with projectR}\label{getting-started-with-projectr}} + +\hypertarget{installation-instructions}{% +\subsection{Installation Instructions}\label{installation-instructions}} + +For automatic Bioconductor package installation, start R, and run: + +\begin{verbatim} +BiocManager::install("projectR") +\end{verbatim} + +\hypertarget{methods}{% +\subsection{Methods}\label{methods}} + +Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to ``omic'' analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless. + +\hypertarget{the-base-projectr-function}{% +\subsection{The base projectR function}\label{the-base-projectr-function}} + +The generic projectR function is executed as follows: + +\begin{verbatim} +library(projectR) +projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) +\end{verbatim} + +\hypertarget{input-arguments}{% +\subsubsection{Input Arguments}\label{input-arguments}} + +The inputs that must be set each time are only the data and loadings, with all other inputs having default values. However, incongruities in the feature mapping between the data and loadings, i.e.~a different format for the rownames of each object, will throw errors or result in an empty mapping and should be checked before running. To overcoming mismatched feature names in the objects themselves, the /code\{dataNames\} and /code\{loadingNames\} arguments can be manually supplied by the user. + +The arguments are as follows: + +\begin{description} +\item[data]{a dataset to be projected into the pattern space} +\item[loadings]{a matrix of continous values with unique rownames to be projected} +\item[dataNames]{a vector containing unique name, i.e. gene names, for the rows of the target dataset to be used to match features with the loadings, if not provided by \texttt{rownames(data)}. Order of names in vector must match order of rows in data.} +\item[loadingsNames]{a vector containing unique names, i.e. gene names, for the rows of loadings to be used to match features with the data, if not provided by \texttt{rownames(loadings)}. Order of names in vector must match order of rows in loadings.} +\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} +\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} +\end{description} + +The \texttt{loadings} argument in the generic projectR function is suitable for use with any genernal feature space, or set of feature spaces, whose rows annotation links them to the data to be projected. Ex: the coeffients associated with individual genes as the result of regression analysis or the amplituded values of individual genes as the result of non-negative matrix factorization (NMF). + +\hypertarget{output}{% +\subsubsection{Output}\label{output}} + +The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the proceedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. + +\hypertarget{pca-projection}{% +\section{PCA projection}\label{pca-projection}} + +Projection of principal components is achieved by matrix multiplication of a new data set by previously generated eigenvectors, or gene loadings. If the original data were standardized such that each gene is centered to zero average expression level, the principal components are normalized eigenvectors of the covariance matrix of the genes. Each PC is ordered according to how much of the variation present in the data they contain. Projection of the original samples into each PC will maximize the variance of the samples in the direction of that component and uncorrelated to previous components. Projection of new data places the new samples into the PCs defined by the original data. Because the components define an orthonormal basis set, they provide an isomorphism between a vector space, \(V\), and \(\Re^n\) which preserves inner products. If \(V\) is an inner product space over \(\Re\) with orthonormal basis \(B = v_1,...,v_n\) and \(v \epsilon V s.t [v]_B = (r_1,...,r_n)\), then finding the coordinate of \(v_i\) in \(v\) is precisely the inner product of \(v\) with \(v_i\), i.e.~\(r_i = \langle v,v_i \rangle\). This formulation is implemented for only those genes belonging to both the new data and the PC space. The \texttt{projectR} function has S4 method for class \texttt{prcomp}. + +\hypertarget{obtaining-pcs-to-project.}{% +\subsection{Obtaining PCs to project.}\label{obtaining-pcs-to-project.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to define PCs} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{data}\NormalTok{(p.RNAseq6l3c3t)} + +\CommentTok{\# do PCA on RNAseq6l3c3t expression data} +\NormalTok{pc.RNAseq6l3c3t<{-}}\KeywordTok{prcomp}\NormalTok{(}\KeywordTok{t}\NormalTok{(p.RNAseq6l3c3t))} +\NormalTok{pcVAR <{-}}\StringTok{ }\KeywordTok{round}\NormalTok{(((pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev)}\OperatorTok{\^{}}\DecValTok{2}\OperatorTok{/}\KeywordTok{sum}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sdev}\OperatorTok{\^{}}\DecValTok{2}\NormalTok{))}\OperatorTok{*}\DecValTok{100}\NormalTok{,}\DecValTok{2}\NormalTok{)} +\NormalTok{dPCA <{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(pc.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{x,pd.RNAseq6l3c3t))} + +\CommentTok{\#plot pca} +\KeywordTok{library}\NormalTok{(ggplot2)} +\NormalTok{setCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Condition:"}\NormalTok{)} +\NormalTok{setFILL <{-}}\StringTok{ }\KeywordTok{scale\_fill\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{,}\StringTok{"red"}\NormalTok{),}\DataTypeTok{guide =} \OtherTok{FALSE}\NormalTok{)} +\NormalTok{setPCH <{-}}\StringTok{ }\KeywordTok{scale\_shape\_manual}\NormalTok{(}\DataTypeTok{values=}\KeywordTok{c}\NormalTok{(}\DecValTok{23}\NormalTok{,}\DecValTok{22}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{25}\NormalTok{,}\DecValTok{21}\NormalTok{,}\DecValTok{24}\NormalTok{),}\DataTypeTok{name=}\StringTok{"Cell Line:"}\NormalTok{)} + +\NormalTok{pPCA <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{ID.cond, }\DataTypeTok{shape=}\NormalTok{ID.line,} + \DataTypeTok{fill=}\NormalTok{ID.cond)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{size=}\NormalTok{days),}\DataTypeTok{alpha=}\NormalTok{.}\DecValTok{6}\NormalTok{)}\OperatorTok{+} +\StringTok{ }\NormalTok{setCOL }\OperatorTok{+}\StringTok{ }\NormalTok{setPCH }\OperatorTok{+}\StringTok{ }\NormalTok{setFILL }\OperatorTok{+} +\StringTok{ }\KeywordTok{scale\_size\_area}\NormalTok{(}\DataTypeTok{breaks =} \KeywordTok{c}\NormalTok{(}\DecValTok{2}\NormalTok{,}\DecValTok{4}\NormalTok{,}\DecValTok{6}\NormalTok{), }\DataTypeTok{name=}\StringTok{"Day"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} + \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} + \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"transparent"}\NormalTok{,}\DataTypeTok{colour=}\OtherTok{NA}\NormalTok{),} + \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"PCA of hPSC PolyA RNAseq"}\NormalTok{,} + \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC1 ("}\NormalTok{,pcVAR[}\DecValTok{1}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} + \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"PC2 ("}\NormalTok{,pcVAR[}\DecValTok{2}\NormalTok{],}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} +\end{Highlighting} +\end{Shaded} + +\hypertarget{projecting-prcomp-objects}{% +\subsection{Projecting prcomp objects}\label{projecting-prcomp-objects}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} +\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} + +\KeywordTok{library}\NormalTok{(projectR)} +\NormalTok{PCA2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(}\DataTypeTok{data =}\NormalTok{ p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{pc.RNAseq6l3c3t,} +\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{, }\DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} +\CommentTok{\#\# [1] "93 row names matched between data and loadings"} +\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} + +\NormalTok{pd.ESepiGen4c1l<{-}}\KeywordTok{data.frame}\NormalTok{(}\DataTypeTok{Condition=}\KeywordTok{sapply}\NormalTok{(}\KeywordTok{colnames}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq),} + \ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{unlist}\NormalTok{(}\KeywordTok{strsplit}\NormalTok{(x,}\StringTok{\textquotesingle{}\_\textquotesingle{}}\NormalTok{))[}\DecValTok{1}\NormalTok{]),}\DataTypeTok{stringsAsFactors=}\OtherTok{FALSE}\NormalTok{)} +\NormalTok{pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color<{-}}\KeywordTok{c}\NormalTok{(}\KeywordTok{rep}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"green"}\NormalTok{,}\DecValTok{3}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"blue"}\NormalTok{,}\DecValTok{2}\NormalTok{),}\KeywordTok{rep}\NormalTok{(}\StringTok{"black"}\NormalTok{,}\DecValTok{2}\NormalTok{))} +\KeywordTok{names}\NormalTok{(pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{color)<{-}pd.ESepiGen4c1l}\OperatorTok{$}\NormalTok{Cond} + +\NormalTok{dPCA2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(PCA2ESepi[[}\DecValTok{1}\NormalTok{]]),pd.ESepiGen4c1l))} + +\CommentTok{\#plot pca} +\KeywordTok{library}\NormalTok{(ggplot2)} +\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} + \DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} + +\NormalTok{pPC2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dPCA2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{PC1, }\DataTypeTok{y=}\NormalTok{PC2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} + \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} + \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} + \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} + \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} +## Warning: package 'gridExtra' was built under R version 4.0.5 +## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please +## use `guide = "none"` instead. +\end{verbatim} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-2-1} \end{adjustwidth} + +\hypertarget{nmf-projection}{% +\section{NMF projection}\label{nmf-projection}} + +NMF decomposes a data matrix of \(D\) with \(N\) genes as rows and \(M\) samples as columns, into two matrices, as \(D ~ AP\). The pattern matrix P has rows associated with BPs in samples and the amplitude matrix A has columns indicating the relative association of a given gene, where the total number of BPs (k) is an input parameter. CoGAPS and GWCoGAPS seek a pattern matrix (\({\bf{P}}\)) and the corresponding distribution matrix of weights (\({\bf{A}}\)) whose product forms a mock data matrix (\({\bf{M}}\)) that represents the gene-wise data \({\bf{D}}\) within noise limits (\(\boldsymbol{\varepsilon}\)). That is, +\begin{equation} +{\bf{D}} = {\bf{M}} + \boldsymbol{\varepsilon} = {\bf{A}}{\bf{P}} + \boldsymbol{\varepsilon}. +\label{eq:matrixDecomp} +\end{equation} +The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling. + +Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in \ref{eq:matrixDecomp} for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}} package. The \texttt{projectR} function has S4 method for class \texttt{Linear Embedding Matrix, LME}. + +\begin{verbatim} +library(projectR) +projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, + NP = NA, full = FALSE) +\end{verbatim} + +\hypertarget{input-arguments-1}{% +\subsubsection{Input Arguments}\label{input-arguments-1}} + +The inputs that must be set each time are only the data and patterns, with all other inputs having default values. However, inconguities between gene names--rownames of the loadings object and either rownames of the data object will throw errors and, subsequently, should be checked before running. + +The arguments are as follows: + +\begin{description} +\item[data]{a target dataset to be projected into the pattern space} +\item[loadings]{a CogapsResult object} +\item[dataNames]{rownames (eg. gene names) of the target dataset, if different from existing rownames of data} +\item[loadingsNames] loadingsNames rownames (eg. gene names) of the loadings to be matched with dataNames +\item[NP]{vector of integers indicating which columns of loadings object to use. The default of NP = NA will use entire matrix.} +\item[full]{logical indicating whether to return the full model solution. By default only the new pattern object is returned.} +\end{description} + +\hypertarget{output-1}{% +\subsubsection{Output}\label{output-1}} + +The basic output of the base projectR function, i.e.~\texttt{full=FALSE}, returns \texttt{projectionPatterns} representing relative weights for the samples from the new data in this previously defined feature space, or set of feature spaces. The full output of the base projectR function, i.e.~\texttt{full=TRUE}, returns \texttt{projectionFit}, a list containing \texttt{projectionPatterns} and \texttt{Projection}. The \texttt{Projection} object contains additional information from the procedure used to obtain the \texttt{projectionPatterns}. For the the the base projectR function, \texttt{Projection} is the full lmFit model from the package \emph{\href{https://bioconductor.org/packages/3.12/limma}{limma}}. + +\hypertarget{obtaining-cogaps-patterns-to-project.}{% +\subsection{Obtaining CoGAPS patterns to project.}\label{obtaining-cogaps-patterns-to-project.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# get data} +\KeywordTok{library}\NormalTok{(projectR)} +\NormalTok{AP <{-}}\StringTok{ }\KeywordTok{get}\NormalTok{(}\KeywordTok{data}\NormalTok{(}\StringTok{"AP.RNAseq6l3c3t"}\NormalTok{)) }\CommentTok{\#CoGAPS run data} +\NormalTok{AP <{-}}\StringTok{ }\NormalTok{AP}\OperatorTok{$}\NormalTok{Amean} +\CommentTok{\# heatmap of gene weights for CoGAPs patterns} +\KeywordTok{library}\NormalTok{(gplots)} +\CommentTok{\#\# Warning: package \textquotesingle{}gplots\textquotesingle{} was built under R version 4.0.5} +\CommentTok{\#\# } +\CommentTok{\#\# Attaching package: \textquotesingle{}gplots\textquotesingle{}} +\CommentTok{\#\# The following object is masked from \textquotesingle{}package:projectR\textquotesingle{}:} +\CommentTok{\#\# } +\CommentTok{\#\# lowess} +\CommentTok{\#\# The following object is masked from \textquotesingle{}package:stats\textquotesingle{}:} +\CommentTok{\#\# } +\CommentTok{\#\# lowess} +\KeywordTok{par}\NormalTok{(}\DataTypeTok{mar=}\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{,}\DecValTok{1}\NormalTok{))} +\NormalTok{pNMF<{-}}\KeywordTok{heatmap.2}\NormalTok{(}\KeywordTok{as.matrix}\NormalTok{(AP),}\DataTypeTok{col=}\NormalTok{bluered, }\DataTypeTok{trace=}\StringTok{\textquotesingle{}none\textquotesingle{}}\NormalTok{,} + \DataTypeTok{distfun=}\ControlFlowTok{function}\NormalTok{(c) }\KeywordTok{as.dist}\NormalTok{(}\DecValTok{1}\OperatorTok{{-}}\KeywordTok{cor}\NormalTok{(}\KeywordTok{t}\NormalTok{(c))) ,} + \DataTypeTok{cexCol=}\DecValTok{1}\NormalTok{,}\DataTypeTok{cexRow=}\NormalTok{.}\DecValTok{5}\NormalTok{,}\DataTypeTok{scale =} \StringTok{"row"}\NormalTok{,} + \DataTypeTok{hclustfun=}\ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{hclust}\NormalTok{(x, }\DataTypeTok{method=}\StringTok{"average"}\NormalTok{)} +\NormalTok{ )} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-3-1} \end{adjustwidth} + +\hypertarget{projecting-cogaps-objects}{% +\subsection{Projecting CoGAPS objects}\label{projecting-cogaps-objects}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to project into PCs from RNAseq6l3c3t expression data} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.ESepiGen4c1l4\textquotesingle{}}\NormalTok{)} +\CommentTok{\#\# Warning in data("p.ESepiGen4c1l4"): data set \textquotesingle{}p.ESepiGen4c1l4\textquotesingle{} not found} +\KeywordTok{data}\NormalTok{(}\StringTok{\textquotesingle{}p.RNAseq6l3c3t\textquotesingle{}}\NormalTok{)} + +\NormalTok{NMF2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{AP,}\DataTypeTok{full=}\OtherTok{TRUE}\NormalTok{,} + \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l[[}\StringTok{"GeneSymbols"}\NormalTok{]])} +\CommentTok{\#\# [1] "93 row names matched between data and loadings"} +\CommentTok{\#\# [1] "Updated dimension of data: 93 9"} + +\NormalTok{dNMF2ESepi<{-}}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(}\KeywordTok{t}\NormalTok{(NMF2ESepi),pd.ESepiGen4c1l))} + +\CommentTok{\#plot pca} +\KeywordTok{library}\NormalTok{(ggplot2)} +\NormalTok{setEpiCOL <{-}}\StringTok{ }\KeywordTok{scale\_colour\_manual}\NormalTok{(}\DataTypeTok{values =} \KeywordTok{c}\NormalTok{(}\StringTok{"red"}\NormalTok{,}\StringTok{"green"}\NormalTok{,}\StringTok{"blue"}\NormalTok{,}\StringTok{"black"}\NormalTok{),} +\DataTypeTok{guide =} \KeywordTok{guide\_legend}\NormalTok{(}\DataTypeTok{title=}\StringTok{"Lineage"}\NormalTok{))} + +\NormalTok{pNMF2ESepiGen4c1l <{-}}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(dNMF2ESepi, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x=}\NormalTok{X1, }\DataTypeTok{y=}\NormalTok{X2, }\DataTypeTok{colour=}\NormalTok{Condition)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_point}\NormalTok{(}\DataTypeTok{size=}\DecValTok{5}\NormalTok{) }\OperatorTok{+}\StringTok{ }\NormalTok{setEpiCOL }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{), }\DataTypeTok{legend.justification=}\KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{,}\DecValTok{0}\NormalTok{),} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill =} \StringTok{"white"}\NormalTok{),} + \DataTypeTok{legend.direction =} \StringTok{"horizontal"}\NormalTok{,} + \DataTypeTok{plot.title =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{vjust =} \DecValTok{0}\NormalTok{,}\DataTypeTok{hjust=}\DecValTok{0}\NormalTok{,}\DataTypeTok{face=}\StringTok{"bold"}\NormalTok{))} + \KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Encode RNAseq in target PC1 \& PC2"}\NormalTok{,} + \DataTypeTok{x=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC1 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{1}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{),} + \DataTypeTok{y=}\KeywordTok{paste}\NormalTok{(}\StringTok{"Projected PC2 ("}\NormalTok{,}\KeywordTok{round}\NormalTok{(PCA2ESepi[[}\DecValTok{2}\NormalTok{]][}\DecValTok{2}\NormalTok{],}\DecValTok{2}\NormalTok{),}\StringTok{"\% of varience)"}\NormalTok{,}\DataTypeTok{sep=}\StringTok{""}\NormalTok{))} +\CommentTok{\#\# $x} +\CommentTok{\#\# [1] "Projected PC1 (18.36\% of varience)"} +\CommentTok{\#\# } +\CommentTok{\#\# $y} +\CommentTok{\#\# [1] "Projected PC2 (17.15\% of varience)"} +\CommentTok{\#\# } +\CommentTok{\#\# $title} +\CommentTok{\#\# [1] "Encode RNAseq in target PC1 \& PC2"} +\CommentTok{\#\# } +\CommentTok{\#\# attr(,"class")} +\CommentTok{\#\# [1] "labels"} +\end{Highlighting} +\end{Shaded} + +\hypertarget{clustering-projection}{% +\section{Clustering projection}\label{clustering-projection}} + +As canonical projection is not defined for clustering objects, the projectR package offers two transfer learning inspired methods to achieve the ``projection'' of clustering objects. These methods are defined by the function used to quantify and transfer the relationships which define each cluster in the original data set to the new dataset. Briefly, \texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. These weights are output as a \texttt{pclust} object which can serve as input to \texttt{projectR}. Alternatively, the \texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. Both \texttt{cluster2pattern} and \texttt{intersectoR} methods are coded for a generic list structure with additional S4 class methods for kmeans and hclust objects. Further details and examples are provided in the followin respecitive sections. + +\hypertarget{cluster2pattern}{% +\subsection{cluster2pattern}\label{cluster2pattern}} + +\texttt{cluster2pattern} uses the corelation of each genes expression to the mean of each cluster to define continuous weights. + +\begin{verbatim} +library(projectR) +data(p.RNAseq6l3c3t) + + +nP<-5 +kClust<-kmeans(p.RNAseq6l3c3t,centers=nP) +kpattern<-cluster2pattern(clusters = kClust, NP = nP, data = p.RNAseq6l3c3t) +kpattern + +cluster2pattern(clusters = NA, NP = NA, data = NA) +\end{verbatim} + +\hypertarget{input-arguments-2}{% +\subsubsection{Input Arguments}\label{input-arguments-2}} + +The inputs that must be set each time are the clusters and data. + +The arguments are as follows: + +\begin{description} +\item[clusters]{a clustering object} +\item[NP]{either the number of clusters desired or the subset of clusters to use} +\item[data]{data used to make clusters object} +\end{description} + +\hypertarget{output-2}{% +\subsubsection{Output}\label{output-2}} + +The output of the \texttt{cluster2pattern} function is a \texttt{pclust} class object; specifically, a matrix of genes (rows) by clusters (columns). A gene's value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene's value is the correlation of the gene's expression to the mean of that cluster. + +\hypertarget{intersector}{% +\subsection{intersectoR}\label{intersector}} + +\texttt{intersectoR} function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for \texttt{hclust} and \texttt{kmeans} objects are also available. + +\begin{verbatim} +library(projectR) +intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) +\end{verbatim} + +\hypertarget{input-arguments-3}{% +\subsubsection{Input Arguments}\label{input-arguments-3}} + +The inputs that must be set each time are the clusters and data. + +The arguments are as follows: + +\begin{description} +\item[pSet1]{a list for a set of patterns where each entry is a set of genes associated with a single pattern} +\item[pSet2]{a list for a second set of patterns where each entry is a set of genes associated with a single pattern} +\item[pval]{the maximum p-value considered significant} +\item[full]{logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.} +\item[k]{numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order} +\end{description} + +\hypertarget{output-3}{% +\subsubsection{Output}\label{output-3}} + +The output of the \texttt{intersectoR} function is a summary matrix showing the sets with statistically significant overlap under the specified \(p\)-value threshold based on a hypergeometric test. If \texttt{full==TRUE} the full data frame of significantly overlapping sets will also be returned. + +\hypertarget{correlation-based-projection}{% +\section{Correlation based projection}\label{correlation-based-projection}} + +Correlation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the \texttt{projectR} function. This matrix can be user-generated or the result of the \texttt{correlateR} function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic \texttt{projectR} function. The \texttt{correlateR} function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows. + +\hypertarget{correlater}{% +\subsection{correlateR}\label{correlater}} + +\begin{verbatim} +library(projectR) +correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) +\end{verbatim} + +\hypertarget{input-arguments-4}{% +\subsubsection{Input Arguments}\label{input-arguments-4}} + +The inputs that must be set each time are only the genes and data, with all other inputs having default values. + +The arguments are as follows: + +\begin{description} +\item[genes]{gene or character vector of genes for reference expression pattern dat} +\item[data]{matrix or data frame with genes to be used for to calculate correlation} +\item[threshtype]{Default "R" indicates thresholding by R value or equivalent. Alternatively, "N" indicates a numerical cut off.} +\item[threshold]{numeric indicating value at which to make threshold} +\item[absR]{logical indicating where to include both positive and negatively correlated genes} +\item[...]{addtion imputes to the cor function} +\end{description} + +\hypertarget{output-4}{% +\subsubsection{Output}\label{output-4}} + +The output of the \texttt{correlateR} function is a \texttt{correlateR} class object. Specifically, a matrix of correlation values for those genes whose expression pattern pattern in the dataset is correlated (and anti-correlated if absR=TRUE) above the value given in as the threshold arguement. As this information may be useful in its own right, it is recommended that users inspect the \texttt{correlateR} object before using it as input to the \texttt{projectR} function. + +\hypertarget{obtaining-and-visualizing-objects.}{% +\subsection{\texorpdfstring{Obtaining and visualizing \texttt{correlateR} objects.}{Obtaining and visualizing objects.}}\label{obtaining-and-visualizing-objects.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{data}\NormalTok{(}\StringTok{"p.RNAseq6l3c3t"}\NormalTok{)} + +\CommentTok{\# get genes correlated to T} +\NormalTok{cor2T<{-}}\KeywordTok{correlateR}\NormalTok{(}\DataTypeTok{genes=}\StringTok{"T"}\NormalTok{, }\DataTypeTok{dat=}\NormalTok{p.RNAseq6l3c3t, }\DataTypeTok{threshtype=}\StringTok{"N"}\NormalTok{, }\DataTypeTok{threshold=}\DecValTok{10}\NormalTok{, }\DataTypeTok{absR=}\OtherTok{TRUE}\NormalTok{)} +\NormalTok{cor2T <{-}}\StringTok{ }\NormalTok{cor2T}\OperatorTok{@}\NormalTok{corM} +\CommentTok{\#\#\# heatmap of genes more correlated to T} +\NormalTok{indx<{-}}\KeywordTok{unlist}\NormalTok{(}\KeywordTok{sapply}\NormalTok{(cor2T,rownames))} +\NormalTok{indx <{-}}\StringTok{ }\KeywordTok{as.vector}\NormalTok{(indx)} +\KeywordTok{colnames}\NormalTok{(p.RNAseq6l3c3t)<{-}pd.RNAseq6l3c3t}\OperatorTok{$}\NormalTok{sampleX} +\KeywordTok{library}\NormalTok{(reshape2)} +\CommentTok{\#\# Warning: package \textquotesingle{}reshape2\textquotesingle{} was built under R version 4.0.5} +\NormalTok{pm.RNAseq6l3c3t<{-}}\KeywordTok{melt}\NormalTok{(}\KeywordTok{cbind}\NormalTok{(p.RNAseq6l3c3t[indx,],indx))} +\CommentTok{\#\# Using indx as id variables} + +\KeywordTok{library}\NormalTok{(gplots)} +\KeywordTok{library}\NormalTok{(ggplot2)} +\KeywordTok{library}\NormalTok{(viridis)} +\CommentTok{\#\# Warning: package \textquotesingle{}viridis\textquotesingle{} was built under R version 4.0.5} +\CommentTok{\#\# Loading required package: viridisLite} +\CommentTok{\#\# Warning: package \textquotesingle{}viridisLite\textquotesingle{} was built under R version 4.0.5} +\NormalTok{pCorT<{-}}\KeywordTok{ggplot}\NormalTok{(pm.RNAseq6l3c3t, }\KeywordTok{aes}\NormalTok{(variable, indx, }\DataTypeTok{fill =}\NormalTok{ value)) }\OperatorTok{+} +\StringTok{ }\KeywordTok{geom\_tile}\NormalTok{(}\DataTypeTok{colour=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{1.5}\NormalTok{, }\DataTypeTok{stat=}\StringTok{"identity"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{scale\_fill\_viridis}\NormalTok{(}\DataTypeTok{option=}\StringTok{"B"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+}\StringTok{ }\KeywordTok{ylab}\NormalTok{(}\StringTok{""}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{scale\_y\_discrete}\NormalTok{(}\DataTypeTok{limits=}\NormalTok{indx) }\OperatorTok{+} +\StringTok{ }\KeywordTok{ggtitle}\NormalTok{(}\StringTok{"Ten genes most highly pos \& neg correlated with T"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(} + \DataTypeTok{panel.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} + \DataTypeTok{panel.border =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\OtherTok{NA}\NormalTok{,}\DataTypeTok{color=}\StringTok{"gray20"}\NormalTok{, }\DataTypeTok{size=}\FloatTok{0.5}\NormalTok{, }\DataTypeTok{linetype=}\StringTok{"solid"}\NormalTok{),} + \DataTypeTok{panel.grid.major =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{panel.grid.minor =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{axis.line =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{axis.ticks =} \KeywordTok{element\_blank}\NormalTok{(),} + \DataTypeTok{axis.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{),}\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{),} + \DataTypeTok{axis.text.x =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{angle =} \DecValTok{90}\NormalTok{,}\DataTypeTok{vjust=}\NormalTok{.}\DecValTok{5}\NormalTok{),} + \DataTypeTok{legend.text =} \KeywordTok{element\_text}\NormalTok{(}\DataTypeTok{color=}\StringTok{"white"}\NormalTok{, }\DataTypeTok{size=}\KeywordTok{rel}\NormalTok{(}\DecValTok{1}\NormalTok{)),} + \DataTypeTok{legend.background =} \KeywordTok{element\_rect}\NormalTok{(}\DataTypeTok{fill=}\StringTok{"gray20"}\NormalTok{),} + \DataTypeTok{legend.position =} \StringTok{"bottom"}\NormalTok{,} + \DataTypeTok{legend.title=}\KeywordTok{element\_blank}\NormalTok{()} +\NormalTok{)} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-5-1} \end{adjustwidth} + +\hypertarget{projecting-correlater-objects.}{% +\subsection{Projecting correlateR objects.}\label{projecting-correlater-objects.}} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# data to project into from RNAseq6l3c3t expression data} +\KeywordTok{data}\NormalTok{(p.ESepiGen4c1l)} + +\KeywordTok{library}\NormalTok{(projectR)} +\NormalTok{cor2ESepi <{-}}\StringTok{ }\KeywordTok{projectR}\NormalTok{(p.ESepiGen4c1l}\OperatorTok{$}\NormalTok{mRNA.Seq,}\DataTypeTok{loadings=}\NormalTok{cor2T[[}\DecValTok{1}\NormalTok{]],}\DataTypeTok{full=}\OtherTok{FALSE}\NormalTok{,} + \DataTypeTok{dataNames=}\NormalTok{map.ESepiGen4c1l}\OperatorTok{$}\NormalTok{GeneSymbols)} +\CommentTok{\#\# [1] "9 row names matched between data and loadings"} +\CommentTok{\#\# [1] "Updated dimension of data: 9 9"} +\end{Highlighting} +\end{Shaded} + +\hypertarget{differential-features-identification.}{% +\section{Differential features identification.}\label{differential-features-identification.}} + +\hypertarget{projectiondriver}{% +\subsection{projectionDriveR}\label{projectiondriver}} + +Given loadings that define the weight of features (genes) in a given latent space (e.g.~PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. + +\begin{verbatim} +library(projectR) +projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, + pvalue, pattern_name, display = T, normalize_pattern = T) +\end{verbatim} + +\hypertarget{input-arguments-5}{% +\subsubsection{Input Arguments}\label{input-arguments-5}} + +The required inputs are two feature by sample (e.g.~gene by cell) matrices to be compared, the loadings that define the feature weights, and the name of the pattern (column of feature loadings). If applicable, the expression matrices should already be corrected for variables such as sequencing depth. + +The arguments for projectionDriveR are: + +\begin{description} +\item[cellgroup1]{Matrix 1 with features as rows, samples as columns.} +\item[cellgroup2]{Matrix 2 with features as rows, samples as columns.} +\item[loadings]{Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space} +\item[loadingsNames]{Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used} +\item[pattern\_name]{the column name of the loadings by which the features will be weighted} +\item[pvalue]{Determines the significance of the confidence interval to be calculated between the difference of means} +\item[display]{Boolean. Whether or not to plot the estimates of significant features. Default = T} +\item[normalize\_pattern]{Boolean. Whether or not to normalize the average feature weight. Default = T} +\end{description} + +\hypertarget{output-5}{% +\subsubsection{Output}\label{output-5}} + +The output of \texttt{projectionDriveR} is a list of length five \texttt{mean\_ci} holds the confidence intervals for the difference in means for all features, \texttt{weighted\_ci} holds the confidence intervals for the weighted difference in means for all features, and normalized\_weights are the weights themselves. In addition, \texttt{significant\_genes} is a vector of gene names that are significantly different at the threshold provided. \texttt{plotted\_ci} returns the ggplot figure of the confidence intervals, see \texttt{plotConfidenceIntervals} for documentation. + +\hypertarget{identifying-differential-features-associated-with-learned-patterns}{% +\subsubsection{Identifying differential features associated with learned patterns}\label{identifying-differential-features-associated-with-learned-patterns}} + +\begin{Shaded} +\begin{Highlighting}[] +\KeywordTok{options}\NormalTok{(}\DataTypeTok{width =} \DecValTok{60}\NormalTok{)} +\KeywordTok{library}\NormalTok{(projectR)} +\KeywordTok{library}\NormalTok{(dplyr, }\DataTypeTok{warn.conflicts =}\NormalTok{ F)} +\CommentTok{\#\# Warning: package \textquotesingle{}dplyr\textquotesingle{} was built under R version 4.0.5} + +\CommentTok{\#gene weights x pattern} +\KeywordTok{data}\NormalTok{(}\StringTok{"retinal\_patterns"}\NormalTok{)} + +\CommentTok{\#size{-}normed, log expression} +\KeywordTok{data}\NormalTok{(}\StringTok{"microglial\_counts"}\NormalTok{)} + +\CommentTok{\#size{-}normed, log expression} +\KeywordTok{data}\NormalTok{(}\StringTok{"glial\_counts"}\NormalTok{)} + +\CommentTok{\#the features by which to weight the difference in expression } +\NormalTok{pattern\_to\_weight <{-}}\StringTok{ "Pattern.24"} +\NormalTok{drivers <{-}}\StringTok{ }\KeywordTok{projectionDriveR}\NormalTok{(microglial\_counts, }\CommentTok{\#expression matrix} +\NormalTok{ glial\_counts, }\CommentTok{\#expression matrix} + \DataTypeTok{loadings =}\NormalTok{ retinal\_patterns, }\CommentTok{\#feature x pattern dataframe} + \DataTypeTok{loadingsNames =} \OtherTok{NULL}\NormalTok{,} + \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight, }\CommentTok{\#column name} + \DataTypeTok{pvalue =} \FloatTok{1e{-}5}\NormalTok{, }\CommentTok{\#pvalue before bonferroni correction} + \DataTypeTok{display =}\NormalTok{ T,} + \DataTypeTok{normalize\_pattern =}\NormalTok{ T) }\CommentTok{\#normalize feature weights} +\CommentTok{\#\# [1] "2996 row names matched between datasets"} +\CommentTok{\#\# [1] "2996"} +\CommentTok{\#\# [1] "Updated dimension of data: 2996"} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/projectionDriver-1} \end{adjustwidth} + +\begin{Shaded} +\begin{Highlighting}[] + +\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{mean\_ci[drivers}\OperatorTok{$}\NormalTok{significant\_genes,]} + +\KeywordTok{str}\NormalTok{(conf\_intervals)} +\CommentTok{\#\# \textquotesingle{}data.frame\textquotesingle{}: 253 obs. of 2 variables:} +\CommentTok{\#\# $ low : num 1.86 0.158 {-}0.562 {-}0.756 0.155 ...} +\CommentTok{\#\# $ high: num 2.03943 0.26729 {-}0.00197 {-}0.18521 0.23239 ...} +\end{Highlighting} +\end{Shaded} + +\hypertarget{plotconfidenceintervals}{% +\subsection{plotConfidenceIntervals}\label{plotconfidenceintervals}} + +\hypertarget{input}{% +\subsubsection{Input}\label{input}} + +The arguments for plotConfidenceIntervals are: + +\begin{description} +\item[confidence\_intervals]{A dataframe of features x estimates} +\item[interval\_name]{names of columns that contain the low and high estimates, respectively. +(default: c("low","high"))} +\item[pattern\_name]{string to use as the title for the plots} +\item[sort]{Boolean. Whether or not to sort genes by their estimates (default = T)} +\item[genes]{a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order (default = NULL will include all genes.)} +\item[weights]{weights of features to include as annotation (default = NULL will not include heatmap)} +\item[weights\_clip]{quantile of data to clip color scale for improved visualization (default: 0.99)} +\item[weights\_vis\_norm]{Which processed version of weights to visualize as a heatmap. One of c("none", "quantile"). default = "none"} +\end{description} + +\hypertarget{output-6}{% +\subsubsection{Output}\label{output-6}} + +A list of the length three that includes confidence interval plots and relevant info. \texttt{ci\_estimates\_plot} is the point-range plot for the provided estimates. If called from within \texttt{projectionDriveR}, the unweighted estimates are used. \texttt{feature\_order} is the vector of gene names in the order shown in the figure. \texttt{weights\_heatmap} is a heatmap annotation of the gene loadings, in the same order as above. + +\hypertarget{customize-plotting-of-confidence-intervals}{% +\subsubsection{Customize plotting of confidence intervals}\label{customize-plotting-of-confidence-intervals}} + +\begin{Shaded} +\begin{Highlighting}[] +\KeywordTok{library}\NormalTok{(cowplot)} +\CommentTok{\#\# Warning: package \textquotesingle{}cowplot\textquotesingle{} was built under R version 4.0.5} +\CommentTok{\#order in ascending order of estimates} +\NormalTok{conf\_intervals <{-}}\StringTok{ }\NormalTok{conf\_intervals }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{mid =}\NormalTok{ (high}\OperatorTok{+}\NormalTok{low)}\OperatorTok{/}\DecValTok{2}\NormalTok{) }\OperatorTok{\%>\%}\StringTok{ }\KeywordTok{arrange}\NormalTok{(mid)} +\NormalTok{gene\_order <{-}}\StringTok{ }\KeywordTok{rownames}\NormalTok{(conf\_intervals)} + +\CommentTok{\#add text labels for top and bottom n genes} +\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name <{-}}\StringTok{ }\OtherTok{NA\_character\_} +\NormalTok{n <{-}}\StringTok{ }\DecValTok{2} +\NormalTok{idx <{-}}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\OperatorTok{:}\NormalTok{n, (}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{]}\OperatorTok{{-}}\NormalTok{(n}\DecValTok{{-}1}\NormalTok{))}\OperatorTok{:}\KeywordTok{dim}\NormalTok{(conf\_intervals)[}\DecValTok{1}\NormalTok{])} +\NormalTok{gene\_ids <{-}}\StringTok{ }\NormalTok{gene\_order[idx]} +\NormalTok{conf\_intervals}\OperatorTok{$}\NormalTok{label\_name[idx] <{-}}\StringTok{ }\NormalTok{gene\_ids} + +\CommentTok{\#the labels above can now be used as ggplot aesthetics} +\NormalTok{plots\_list <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(conf\_intervals, }\CommentTok{\#mean difference in expression confidence intervals} + \DataTypeTok{sort =}\NormalTok{ F, }\CommentTok{\#should genes be sorted by estimates} + \DataTypeTok{weights =}\NormalTok{ drivers}\OperatorTok{$}\NormalTok{normalized\_weights[}\KeywordTok{rownames}\NormalTok{(conf\_intervals)],} + \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight,} + \DataTypeTok{weights\_clip =} \FloatTok{0.99}\NormalTok{,} + \DataTypeTok{weights\_vis\_norm =} \StringTok{"none"}\NormalTok{)} + +\NormalTok{pl1 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} +\StringTok{ }\NormalTok{ggrepel}\OperatorTok{::}\KeywordTok{geom\_label\_repel}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{label =}\NormalTok{ label\_name), }\DataTypeTok{max.overlaps =} \DecValTok{20}\NormalTok{, }\DataTypeTok{force =} \DecValTok{50}\NormalTok{)} + +\NormalTok{pl2 <{-}}\StringTok{ }\NormalTok{plots\_list[[}\StringTok{"weights\_heatmap"}\NormalTok{]]} + +\CommentTok{\#now plot the weighted differences} +\NormalTok{weighted\_conf\_intervals <{-}}\StringTok{ }\NormalTok{drivers}\OperatorTok{$}\NormalTok{weighted\_mean\_ci[gene\_order,]} +\NormalTok{plots\_list\_weighted <{-}}\StringTok{ }\KeywordTok{plotConfidenceIntervals}\NormalTok{(weighted\_conf\_intervals,} + \DataTypeTok{sort =}\NormalTok{ F,} + \DataTypeTok{pattern\_name =}\NormalTok{ pattern\_to\_weight)} + +\NormalTok{pl3 <{-}}\StringTok{ }\NormalTok{plots\_list\_weighted[[}\StringTok{"ci\_estimates\_plot"}\NormalTok{]] }\OperatorTok{+} +\StringTok{ }\KeywordTok{xlab}\NormalTok{(}\StringTok{"Difference in weighted group means"}\NormalTok{) }\OperatorTok{+} +\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.title.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.ticks.y =} \KeywordTok{element\_blank}\NormalTok{(), }\DataTypeTok{axis.text.y =} \KeywordTok{element\_blank}\NormalTok{())} + +\NormalTok{cowplot}\OperatorTok{::}\KeywordTok{plot\_grid}\NormalTok{(pl1, pl2, pl3, }\DataTypeTok{align =} \StringTok{"h"}\NormalTok{, }\DataTypeTok{rel\_widths =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{,.}\DecValTok{4}\NormalTok{, }\DecValTok{1}\NormalTok{), }\DataTypeTok{ncol =} \DecValTok{3}\NormalTok{)} +\CommentTok{\#\# Warning: Removed 249 rows containing missing values} +\CommentTok{\#\# (geom\_label\_repel).} +\end{Highlighting} +\end{Shaded} + +\begin{adjustwidth}{\fltoffset}{0mm} +\includegraphics[width=1\linewidth,]{E:/Projects/Fertiglab/projectR/vignettes/projectR_files/figure-latex/unnamed-chunk-7-1} \end{adjustwidth} + +\hypertarget{refs}{} +\begin{cslreferences} +\leavevmode\hypertarget{ref-Barbakh:2009bw}{}% +Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. ``Review of Linear Projection Methods.'' In \emph{Non-Standard Parameter Adaptation for Exploratory Data Analysis}, 29--48. Berlin, Heidelberg: Springer Berlin Heidelberg. + +\leavevmode\hypertarget{ref-Sibisi1997}{}% +Sibisi, Sibusiso, and John Skilling. 1997. ``Prior Distributions on Measure Space.'' \emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)} 59 (1): 217--35. \url{https://doi.org/10.1111/1467-9868.00065}. + +\leavevmode\hypertarget{ref-Ochs2006}{}% +Wang, Guoli, Andrew V. Kossenkov, and Michael F. Ochs. 2006. ``LS-Nmf: A Modified Non-Negative Matrix Factorization Algorithm Utilizing Uncertainty Estimates.'' \emph{BMC Bioinformatics} 7 (1): 175. \url{https://doi.org/10.1186/1471-2105-7-175}. +\end{cslreferences} + + +\end{document} From b8009990ab257b03980b85a18992a6886337d105 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Fri, 16 Feb 2024 12:56:42 -0500 Subject: [PATCH 23/33] Style updates updated roxygen2 documentation --- man/bonferroniCorrectedDifferences.Rd | 12 ++++++------ man/pdVolcano.Rd | 11 ++++++----- man/plotConfidenceIntervals.Rd | 23 +++++++++++++---------- man/projectionDriveR.Rd | 19 ++++++++++--------- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/man/bonferroniCorrectedDifferences.Rd b/man/bonferroniCorrectedDifferences.Rd index e0cfccc..2c8cebb 100644 --- a/man/bonferroniCorrectedDifferences.Rd +++ b/man/bonferroniCorrectedDifferences.Rd @@ -7,9 +7,9 @@ bonferroniCorrectedDifferences( group1, group2, + pvalue, diff_weights = NULL, - mode = "CI", - pvalue + mode = "CI" ) } \arguments{ @@ -17,12 +17,12 @@ bonferroniCorrectedDifferences( \item{group2}{count matrix 2} -\item{diff_weights}{loadings to weight the differential expression between the groups} +\item{pvalue}{significance value to threshold} -\item{mode}{user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI} +\item{diff_weights}{loadings to weight the differential expression} -\item{pvalue}{significance value to threshold} +\item{mode}{statistical approach, confidence intervals(CI) or pvalues(PV)} } \description{ -Calculate the weighted and unweighted difference in means for each measurement between two groups. +Calculate weighted/unweighted mean difference for each gene between 2 groups } diff --git a/man/pdVolcano.Rd b/man/pdVolcano.Rd index 3127841..fd80d50 100644 --- a/man/pdVolcano.Rd +++ b/man/pdVolcano.Rd @@ -10,16 +10,16 @@ pdVolcano( pvalue = NULL, subset = NULL, filter.inf = FALSE, - label.num = 5, - display = T + label.num = 5L, + display = TRUE ) } \arguments{ -\item{result}{result output from projectionDriveR function with PV mode selected} +\item{result}{result output from projectionDriveR function in PV mode} \item{FC}{fold change threshold, default at 0.2} -\item{pvalue}{significance threshold, default set to pvalue stored in projectionDriveR output} +\item{pvalue}{significance threshold, default set stored pvalue} \item{subset}{vector of gene names to subset the plot by} @@ -33,5 +33,6 @@ pdVolcano( A list with weighted and unweighted differential expression metrics } \description{ -Generate volcano plot and gate genes based on fold change and pvalue, includes vectors that can be used with fast gene set enrichment (fgsea) +Generate volcano plot and gate genes based on fold change and pvalue, +includes vectors that can be used with fast gene set enrichment (fgsea) } diff --git a/man/plotConfidenceIntervals.Rd b/man/plotConfidenceIntervals.Rd index f67993a..bdbc9f0 100644 --- a/man/plotConfidenceIntervals.Rd +++ b/man/plotConfidenceIntervals.Rd @@ -8,36 +8,39 @@ plotConfidenceIntervals( confidence_intervals, interval_name = c("low", "high"), pattern_name = NULL, - sort = T, + sort = TRUE, genes = NULL, weights = NULL, weights_clip = 0.99, weights_vis_norm = "none", - weighted = F + weighted = FALSE ) } \arguments{ \item{confidence_intervals}{A dataframe of features x estimates.} -\item{interval_name}{names of columns that contain the low and high estimates, respectively. Default: c("low","high")} +\item{interval_name}{Estimate column names. Default: c("low","high")} \item{pattern_name}{string to use as the title for plots.} -\item{sort}{Boolean. Whether or not to sort genes by their estimates (default = T)} +\item{sort}{Boolean. Sort genes by their estimates (default = TRUE)} -\item{genes}{a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order.} +\item{genes}{a vector with names of genes to include in plot. +If sort=F, estimates will be plotted in this order.} \item{weights}{optional. weights of features to include as annotation.} -\item{weights_clip}{optional. quantile of data to clip color scale for improved visualization. Default: 0.99} +\item{weights_clip}{optional. quantile of data to clip color scale for +improved visualization. Default: 0.99} -\item{weights_vis_norm}{Which processed version of weights to visualize as a heatmap.} +\item{weights_vis_norm}{Which version of weights to visualize as a heatmap. +Options are "none" (uses provided weights) or "quantiles". Default: none} -\item{weighted}{specifies whether the confidence intervals in use are weighted by the pattern and labels plots accordingly -Options are "none" (which uses provided weights) or "quantiles". Default: none} +\item{weighted}{specifies whether the confidence intervals in use are +weighted by the pattern and labels plots accordingly} } \value{ -A list with pointrange estimates and, if requested, a heatmap of pattern weights. +A list with pointrange estimates and a heatmap of pattern weights. } \description{ Generate point and line confidence intervals from provided estimates. diff --git a/man/projectionDriveR.Rd b/man/projectionDriveR.Rd index 3b17249..8d3ad41 100644 --- a/man/projectionDriveR.Rd +++ b/man/projectionDriveR.Rd @@ -8,8 +8,8 @@ projectionDriveR( cellgroup1, cellgroup2, loadings, - loadingsNames = NULL, pattern_name, + loadingsNames = NULL, pvalue = 1e-05, display = TRUE, normalize_pattern = TRUE, @@ -23,21 +23,22 @@ projectionDriveR( \item{loadings}{A matrix of continuous values defining the features} -\item{loadingsNames}{a vector with names of loading rows. Defaults to rownames.} +\item{pattern_name}{column of loadings for which drivers will be calculated} -\item{pattern_name}{column of loadings for which drivers will be calculated.} +\item{loadingsNames}{a vector with names of loading rows defaults to rownames} -\item{pvalue}{confidence level for the bonferroni confidence intervals. Default 1e-5} +\item{pvalue}{confidence level. Default 1e-5} -\item{display}{boolean. Whether or not to plot and display confidence intervals} +\item{display}{boolean. Whether or not to display confidence intervals} -\item{normalize_pattern}{Boolean. Whether or not to normalize pattern weights.} +\item{normalize_pattern}{Boolean. Whether or not to normalize pattern weights} -\item{mode}{user specified statistical approach, confidence intervals (CI) or pvalues (PV) - default to CI} +\item{mode}{statistical approach, confidence intervals or pvalues. default CI} } \value{ -A list with weighted mean differences, mean differences, and differential genes that meet the provided signficance threshold. +A list with unweighted/weighted mean differences and differential +genes that meet the provided signficance threshold. } \description{ -Calculate the weighted difference in expression between two groups (group1 - group2) +Calculate weighted expression difference between two groups (group1 - group2) } From 08394c7d860df7a65ea1efed84d5eed9f43b8ab3 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Fri, 16 Feb 2024 12:59:11 -0500 Subject: [PATCH 24/33] Style updates Incorporated some lintr suggestions Resolved 'no visible binding for global variable' Added automatic plotting Condensed volcano plotting to one parametrized function --- R/plotting.R | 328 +++++++++++++++++++++++++++++---------------------- 1 file changed, 184 insertions(+), 144 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 53c1118..8258343 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,127 +1,148 @@ -####################################################################################################################################### -#' +################################################################################ #' plotConfidenceIntervals -#' -#' Generate point and line confidence intervals from provided estimates. -#' +#' +#' Generate point and line confidence intervals from provided estimates. #' @import ggplot2 #' @import viridis #' @importFrom scales squish #' @importFrom dplyr %>% mutate dense_rank #' @importFrom cowplot plot_grid -#' @param confidence_intervals A dataframe of features x estimates. -#' @param interval_name names of columns that contain the low and high estimates, respectively. Default: c("low","high") +#' @param confidence_intervals A dataframe of features x estimates. +#' @param interval_name Estimate column names. Default: c("low","high") #' @param pattern_name string to use as the title for plots. -#' @param sort Boolean. Whether or not to sort genes by their estimates (default = T) -#' @param genes a vector with names of genes to include in plot. If sort=F, estimates will be plotted in this order. -#' @param weights optional. weights of features to include as annotation. -#' @param weights_clip optional. quantile of data to clip color scale for improved visualization. Default: 0.99 -#' @param weights_vis_norm Which processed version of weights to visualize as a heatmap. -#' @param weighted specifies whether the confidence intervals in use are weighted by the pattern and labels plots accordingly -#' Options are "none" (which uses provided weights) or "quantiles". Default: none -#' @return A list with pointrange estimates and, if requested, a heatmap of pattern weights. +#' @param sort Boolean. Sort genes by their estimates (default = TRUE) +#' @param genes a vector with names of genes to include in plot. +#' If sort=F, estimates will be plotted in this order. +#' @param weights optional. weights of features to include as annotation. +#' @param weights_clip optional. quantile of data to clip color scale for +#' improved visualization. Default: 0.99 +#' @param weights_vis_norm Which version of weights to visualize as a heatmap. +#' Options are "none" (uses provided weights) or "quantiles". Default: none +#' @param weighted specifies whether the confidence intervals in use are +#' weighted by the pattern and labels plots accordingly +#' @return A list with pointrange estimates and a heatmap of pattern weights. #' @export plotConfidenceIntervals <- function( confidence_intervals, - interval_name = c("low","high"), + interval_name = c("low", "high"), pattern_name = NULL, - sort = T, + sort = TRUE, genes = NULL, weights = NULL, weights_clip = 0.99, weights_vis_norm = "none", - weighted = F){ - - if(weights_clip < 0 | weights_clip > 1){ + weighted = FALSE) { + + #bind variables locally to the function + mid <- idx <- low <- high <- positive <- NULL + if (weights_clip < 0L || weights_clip > 1L) { stop("weights_clip must be numeric between 0 and 1") } - - if(!(weights_vis_norm %in% c("none","quantiles"))){ + + if (!(weights_vis_norm %in% c("none", "quantiles"))) { stop("weights_vis_norm must be either 'none' or 'quantiles'") } - - if(weighted == F){ + + if (weighted == FALSE) { lab = "Unweighted" - } else{ + } else { lab = "Weighted" } - #gene names were stored as rownames, make sure high and low estimates are stored + #gene names were stored as rownames, store high and low estimates confidence_intervals$gene_names <- rownames(confidence_intervals) - confidence_intervals$low <- confidence_intervals[,interval_name[1]] - confidence_intervals$high <- confidence_intervals[,interval_name[2]] - - n <- dim(confidence_intervals)[1] + confidence_intervals$low <- confidence_intervals[, interval_name[1L]] + confidence_intervals$high <- confidence_intervals[, interval_name[2L]] + + n <- dim(confidence_intervals)[1L] confidence_intervals <- confidence_intervals %>% mutate( - mid = (high+low)/2, #estimate, used for point position - positive = mid > 0) #upregulated, used for color scheme - - if(!is.null(genes)){ + mid = (high + low) / 2L, #estimate, used for point position + positive = mid > 0L) #upregulated, used for color scheme + + if (!is.null(genes)) { #select genes provided and get them in that order - if(!(is.character(genes))){ stop("Genes must be provided as a character vector") } + if (!(is.character(genes))) { + stop("Genes must be provided as a character vector") + } n <- length(genes) - message(paste0("Selecting ", n, " features")) - confidence_intervals <- confidence_intervals[genes,] - + message("Selecting ", n, " features") + confidence_intervals <- confidence_intervals[genes, ] + } - - if(sort){ + + if (sort) { #order in increasing order on estimates, and create index variable message("sorting genes in increasing order of estimates...") - confidence_intervals <- confidence_intervals %>% mutate(idx = dense_rank(mid)) %>% + confidence_intervals <- confidence_intervals %>% mutate( + idx = dense_rank(mid)) %>% arrange(mid) - - } else{ + + } else { #if not sorted, create index variable for current order - confidence_intervals <- confidence_intervals %>% mutate(idx = 1:n) + confidence_intervals <- confidence_intervals %>% mutate(idx = 1L:n) } - - #genereate point range plot - ci_plot <- ggplot(data = confidence_intervals, aes(y = idx, x = mid)) + geom_pointrange(aes(xmin = low, xmax = high, color = positive)) + - geom_point(aes(x = mid, y = idx), fill ="black",color = "black") + + + #generate point range plot + ci_plot <- ggplot(data = confidence_intervals, + aes(y = idx, x = mid)) + + geom_pointrange(aes(xmin = low, + xmax = high, + color = positive)) + + geom_point(aes(x = mid, + y = idx), + fill = "black", + color = "black") + theme_minimal() + xlab("Difference in group means") + ylab("Genes") + - geom_vline(xintercept = 0, color = "black", linetype = "dashed") + + geom_vline(xintercept = 0L, color = "black", linetype = "dashed") + theme(legend.position = "none") + ggtitle(lab) - + #if provided, create heatmap for pattern weights - if(!is.null(weights)){ + if (!is.null(weights)) { - #check that weights are formatted as a named vector - if(!(is.numeric(weights))){ stop("Weights must be provided as a numeric vector") } - if(is.null(names(weights))){ stop("Weights must have names that match estimates")} - - #either use pattern_name, or if not provided, just label heatmap with "weights" + #label with pattern name if provided hm_name <- ifelse(is.null(pattern_name), "weights", pattern_name) #maintain established order from the pointrange plot ordered_weights <- weights[rownames(confidence_intervals)] - - if(weights_vis_norm == "quantiles"){ - #transform to percentiles from 0 to 1 - ordered_weights <- trunc(rank(ordered_weights))/length(ordered_weights) - hm_name <- paste0(hm_name, " (quantiles)") #append quantile to plot name - } - confidence_intervals$weights <- ordered_weights #generate heatmap - wt_heatmap <- ggplot(data = confidence_intervals) + - geom_tile(aes(x = 1, y = 1:n, fill = weights)) + - scale_fill_viridis(limits=c(0, quantile(ordered_weights,weights_clip )), - oob=scales::squish, + wt_heatmap <- ggplot2::ggplot(data = confidence_intervals) + + geom_tile(aes(x = 1L, y = 1L:n, fill = weights)) + + scale_fill_viridis(limits = c(0L, quantile(ordered_weights, weights_clip)), + oob = scales::squish, name = hm_name) + theme_void() - - } else{ wt_heatmap = NULL} #if weights aren't provided, return NULL - + + #check that weights are formatted as a named vector + if (!(is.numeric(weights))) { + stop("Weights must be provided as a numeric vector") + } + if (is.null(names(weights))) { + stop("Weights must have names that match estimates") + } + + if (weights_vis_norm == "quantiles") { + #transform to percentiles from 0 to 1 + ordered_weights <- trunc(rank(ordered_weights)) / length(ordered_weights) + hm_name <- paste0(hm_name, " (quantiles)") #append quantile to plot name + } + + } else { + #if weights aren't provided, return NULL + return(list("ci_estimates_plot" = ci_plot, + "feature_order" = rownames(confidence_intervals), + "weights_heatmap" = NULL)) + } + return(list("ci_estimates_plot" = ci_plot, "feature_order" = rownames(confidence_intervals), "weights_heatmap" = wt_heatmap)) } -####################################################################################################################################### +################################################################################ #' plotVolcano #' #' Volcano plotting function @@ -131,50 +152,54 @@ plotConfidenceIntervals <- function( #' @param pvalue p value threshold #' @param title plot title #' @export - -plotVolcano<-function( - stats, #pdVolcano stats dataframe - metadata, #metadata from pdVolcano +plotVolcano <- function( + stats, + metadata, FC, pvalue, title -){ - +) { + #bind variables locally + mean_diff <- welch_padj <- Color <- NULL #set custom colors - myColors <- c("gray","red","dodgerblue") + myColors <- c("gray", "red", "dodgerblue") names(myColors) <- levels(stats$Color) custom_colors <- scale_color_manual(values = myColors, drop = FALSE) #plot - volcano <- ggplot(data = stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = stats$label)) + + volcano <- ggplot(data = stats, + aes(x = mean_diff, y = -log10(welch_padj), + color = Color, + label = stats$label)) + geom_vline(xintercept = c(FC, -FC), lty = "dashed") + geom_hline(yintercept = -log10(pvalue), lty = "dashed") + - geom_point(na.rm = TRUE) + - custom_colors + - coord_cartesian(ylim = c(0, 350), xlim = c(min(stats$mean_diff), max(stats$mean_diff))) + - ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black", - min.segment.length = .1, box.padding = 0.15, + geom_point(na.rm = TRUE) + + custom_colors + + coord_cartesian(ylim = c(0L, 350L), + xlim = c(min(stats$mean_diff), max(stats$mean_diff))) + + ggrepel::geom_text_repel(size = 3L, point.padding = 1L, color = "black", + min.segment.length = 0.1, box.padding = 0.15, max.overlaps = Inf, na.rm = TRUE) + labs(x = "FC", y = "Significance (-log10pval)", color = NULL) + - ggtitle(paste(title)) + + ggtitle(paste(title)) + theme_bw() + - theme(plot.title = element_text(size = 16), + theme(plot.title = element_text(size = 16L), legend.position = "bottom", - axis.title=element_text(size=14), - legend.text = element_text(size=12)) + axis.title = element_text(size = 14L), + legend.text = element_text(size = 12L)) return(volcano) } - -####################################################################################################################################### +################################################################################ #' pdVolcano #' -#' Generate volcano plot and gate genes based on fold change and pvalue, includes vectors that can be used with fast gene set enrichment (fgsea) -#' @param result result output from projectionDriveR function with PV mode selected +#' Generate volcano plot and gate genes based on fold change and pvalue, +#' includes vectors that can be used with fast gene set enrichment (fgsea) +#' @param result result output from projectionDriveR function in PV mode #' @param FC fold change threshold, default at 0.2 -#' @param pvalue significance threshold, default set to pvalue stored in projectionDriveR output +#' @param pvalue significance threshold, default set stored pvalue #' @param subset vector of gene names to subset the plot by #' @param filter.inf remove genes that have pvalues below machine double minimum value #' @param label.num Number of genes to label on either side of the volcano plot, default 5 @@ -188,57 +213,56 @@ plotVolcano<-function( #' @return A list with weighted and unweighted differential expression metrics #' @export #plot FC, weighted and unweighted. Designed for use with the output of projectionDriveRs -pdVolcano <- function(result, +pdVolcano <- function(result, FC = 0.2, pvalue = NULL, - subset = NULL, + subset = NULL, filter.inf = FALSE, - label.num = 5, - display = T) { - + label.num = 5L, + display = TRUE) { #if a genelist is provided, use them to subset the output of projectiondrivers - if(!is.null(subset)){ + if (!is.null(subset)) { #subset the mean_stats object by provided gene list - result$mean_stats <- result$mean_stats[(which(rownames(result$mean_stats) %in% subset)),] + result$mean_stats <- result$mean_stats[(which(rownames(result$mean_stats) %in% subset)), ] #subset the weighted_mean_stats object by provided gene list - result$weighted_mean_stats <- result$weighted_mean_stats[(which(rownames(result$weighted_mean_stats) %in% subset)),] - + result$weighted_mean_stats <- result$weighted_mean_stats[(which(rownames(result$weighted_mean_stats) %in% subset)), ] + } - - if(filter.inf == TRUE){ + + if (filter.inf == TRUE) { #remove p values below the machine limit representation for plotting purposes - cat("Filtering", length(which(result$mean_stats$welch_padj <= .Machine$double.xmin)),"unweighted genes and", + cat("Filtering", length(which(result$mean_stats$welch_padj <= .Machine$double.xmin)), "unweighted genes and", length(which(result$weighted_mean_stats$welch_padj <= .Machine$double.xmin)), "weighted genes", "\n") result$mean_stats <- subset(result$mean_stats, welch_padj > .Machine$double.xmin) result$weighted_mean_stats <- subset(result$weighted_mean_stats, welch_padj > .Machine$double.xmin) } - - if(is.numeric(FC) == FALSE){ + + if (is.numeric(FC) == FALSE) { stop('FC must be a number') } - - if(is.null(pvalue) == FALSE) { - message('Updating sig_genes...') + + if (is.null(pvalue) == FALSE) { + message("Updating sig_genes...") #update previously stored pvalue pvalue <- pvalue result$meta_data$pvalue <- pvalue #update sig_genes with new pvalue #recreate vector of significant genes from weighted and unweighted tests - weighted_PV_sig <- rownames(result$weighted_mean_stats[which(result$weighted_mean_stats$welch_padj <= pvalue),]) - PV_sig <- rownames(result$mean_stats[which(result$mean_stats$welch_padj <= pvalue),]) + weighted_PV_sig <- rownames(result$weighted_mean_stats[which(result$weighted_mean_stats$welch_padj <= pvalue), ]) + PV_sig <- rownames(result$mean_stats[which(result$mean_stats$welch_padj <= pvalue), ]) #create vector of significant genes shared between weighted and unweighted tests shared_genes_PV <- base::intersect( PV_sig, weighted_PV_sig) - result$sig_genes <- list(PV_sig = PV_sig, + result$sig_genes <- list(PV_sig = PV_sig, weighted_PV_sig = weighted_PV_sig, PV_significant_shared_genes = shared_genes_PV) } else { pvalue <- result$meta_data$pvalue } - + #extract object meta data metadata <- result$meta_data - + #volcano plotting unweighted #extract unweighted confidence intervals / statistics mean_stats <- result$mean_stats @@ -247,48 +271,60 @@ pdVolcano <- function(result, mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix) mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix) mean_stats$Color <- factor(mean_stats$Color, - levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix))) - + levels = c(paste("NS or FC <", FC), + paste("Enriched in", metadata$reference_matrix), + paste("Enriched in", metadata$test_matrix))) + #label the most significant genes for enrichment mean_stats$invert_P <- (-log10(mean_stats$welch_padj)) * (mean_stats$mean_diff) - - top_indices <- order(mean_stats$invert_P, decreasing = TRUE)[1:label.num] - bottom_indices <- order(mean_stats$invert_P)[1:label.num] - + + top_indices <- order(mean_stats$invert_P, decreasing = TRUE)[1L:label.num] + bottom_indices <- order(mean_stats$invert_P)[1L:label.num] + # Add labels to the dataframe mean_stats$label <- NA mean_stats$label[top_indices] <- paste(rownames(mean_stats)[top_indices]) mean_stats$label[bottom_indices] <- paste(rownames(mean_stats)[bottom_indices]) #unweighted volcano plot - unweightedvolcano <- plotVolcano(stats = mean_stats, metadata = metadata, FC = FC, pvalue = pvalue, title = "Differential Enrichment") + unweightedvolcano <- plotVolcano(stats = mean_stats, + metadata = metadata, + FC = FC, + pvalue = pvalue, + title = "Differential Enrichment") #weighted volcano plot weighted_mean_stats <- result$weighted_mean_stats weighted_mean_stats$Color <- paste("NS or FC <", FC) weighted_mean_stats$Color[weighted_mean_stats$welch_padj < pvalue & weighted_mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix) weighted_mean_stats$Color[weighted_mean_stats$welch_padj < pvalue & weighted_mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix) weighted_mean_stats$Color <- factor(weighted_mean_stats$Color, - levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix))) - + levels = c(paste("NS or FC <", FC), + paste("Enriched in", metadata$reference_matrix), + paste("Enriched in", metadata$test_matrix))) + weighted_mean_stats$invert_P <- (-log10(weighted_mean_stats$welch_padj)) * (weighted_mean_stats$mean_diff) - - - top_indices_w <- order(weighted_mean_stats$invert_P, decreasing = TRUE)[1:label.num] - bottom_indices_w <- order(weighted_mean_stats$invert_P)[1:label.num] - + + top_indices_w <- order(weighted_mean_stats$invert_P, decreasing = TRUE)[1L:label.num] + bottom_indices_w <- order(weighted_mean_stats$invert_P)[1L:label.num] + # Add labels to the dataframe weighted_mean_stats$label <- NA weighted_mean_stats$label[top_indices_w] <- paste(rownames(weighted_mean_stats)[top_indices_w]) weighted_mean_stats$label[bottom_indices_w] <- paste(rownames(weighted_mean_stats)[bottom_indices_w]) - + #weighted volcano plot - weightedvolcano <- plotVolcano(stats = weighted_mean_stats, FC = FC, pvalue = pvalue, title = "Weighted Differential Enrichment") + weightedvolcano <- plotVolcano(stats = weighted_mean_stats, + FC = FC, + pvalue = pvalue, + title = "Weighted Differential Enrichment") #return a list of genes that can be used as input to fgsea - difexdf <- subset(mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) + difexdf <- subset(mean_stats, + Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) vec <- difexdf$estimate names(vec) <- rownames(difexdf) - - weighted_difexdf <- subset(weighted_mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) + + weighted_difexdf <- subset(weighted_mean_stats, + Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix)) weighted_vec <- weighted_difexdf$estimate names(weighted_vec) <- rownames(weighted_difexdf) names(vec) <- rownames(difexdf) @@ -301,19 +337,23 @@ pdVolcano <- function(result, fgseavecs = list(unweightedvec = vec, weightedvec = weighted_vec), meta_data = metadata, - plt = list(differential_expression = unweightedvolcano, + plt = list(differential_expression = unweightedvolcano, weighted_differential_expression = weightedvolcano)) - if(display == TRUE){ + if (display == TRUE) { #print volcano plots - pltgrid <- cowplot::plot_grid(vol_result$plt$differential_expression + theme(legend.position = "none"), - vol_result$plt$weighted_differential_expression + theme(legend.position = "none"), - ncol = 2, align = "h") - legend <- cowplot::get_legend(vol_result$plt$differential_expression + guides(color = guide_legend(nrow = 1)) +theme(legend.position = "bottom")) - plt <- cowplot::plot_grid(pltgrid, legend, ncol = 1, rel_heights = c(1, .1)) + pltgrid <- cowplot::plot_grid(vol_result$plt$differential_expression + + theme(legend.position = "none"), + vol_result$plt$weighted_differential_expression + + theme(legend.position = "none"), + ncol = 2L, align = "h") + legend <- cowplot::get_legend(vol_result$plt$differential_expression + + guides(color = guide_legend(nrow = 1L)) + + theme(legend.position = "bottom")) + plt <- cowplot::plot_grid(pltgrid, + legend, + ncol = 1L, + rel_heights = c(1.0, 0.1)) print(plt) } return(vol_result) } - - - From bc1d94834f267f53182b01cb01d4e156b9c386ff Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Fri, 16 Feb 2024 13:02:25 -0500 Subject: [PATCH 25/33] Style updates Incorporated some lintr suggestions Added display options for volcano plots --- R/projectionDriveRfun.R | 250 +++++++++++++++++++++------------------- 1 file changed, 129 insertions(+), 121 deletions(-) diff --git a/R/projectionDriveRfun.R b/R/projectionDriveRfun.R index 717c0dd..91dd4e2 100644 --- a/R/projectionDriveRfun.R +++ b/R/projectionDriveRfun.R @@ -10,13 +10,13 @@ #' @importFrom stats var #' @importFrom ggrepel geom_label_repel #' @import dplyr -bonferronicorrecteddifferences <- function( +bonferroniCorrectedDifferences <- function( group1, group2, pvalue, diff_weights = NULL, mode = "CI") { - if (!(dim(group1)[[1L]] == dim(group2)[[1L]])) { + if (!(dim(group1)[1L] == dim(group2)[1L])) { #if passed from projectionDrivers, cellgroups will have the same rows stop("Rows of two cell group matrices are not identical") } @@ -26,8 +26,8 @@ bonferronicorrecteddifferences <- function( } ##Take means over all genes and calculate differences - group1_mean <- rowMeans(group1) - group2_mean <- rowMeans(group2) + group1_mean <- apply(group1, 1L, mean) + group2_mean <- apply(group2, 1L, mean) mean_diff <- group1_mean - group2_mean @@ -46,8 +46,8 @@ bonferronicorrecteddifferences <- function( #calculate confidence intervals dimensionality <- length(mean_diff) #number of measurements (genes) - n1_samples <- dim(group1)[[2L]] #number of samples (cells) - n2_samples <- dim(group2)[[2L]] + n1_samples <- dim(group1)[2L] #number of samples (cells) + n2_samples <- dim(group2)[2L] bon_correct <- pvalue / (2L * dimensionality) #bonferroni correction qval <- 1L - bon_correct @@ -56,16 +56,11 @@ bonferronicorrecteddifferences <- function( group1_var <- apply(group1, 1L, var) #variance of genes across group 1 group2_var <- apply(group2, 1L, var) #variance of genes across group 2 - - if (mode == "CI") { #pooled standard deviation - pool <- ((n1_samples - 1L) * group1_var + (n2_samples - 1L) * group2_var) / - (n1_samples + n2_samples - 2L) - plusminus <- data.frame(low = mean_diff - tval * - sqrt(pool * (1L / n1_samples + 1L / n2_samples)), - high = mean_diff + tval * - sqrt(pool * (1L / n1_samples + 1L / n2_samples)), + pool <- ((n1_samples - 1L) * group1_var + (n2_samples - 1L) * group2_var) / (n1_samples + n2_samples - 2L) + plusminus <- data.frame(low = mean_diff - tval * sqrt(pool * (1L / n1_samples + 1L / n2_samples)), + high = mean_diff + tval * sqrt(pool * (1L / n1_samples + 1L / n2_samples)), gene = names(mean_diff)) rownames(plusminus) <- names(mean_diff) @@ -86,12 +81,10 @@ bonferronicorrecteddifferences <- function( method = "bonferroni", n = dimensionality) #replace p values equal to zero with the smallest machine value possible - if (min(welch_padj, na.rm=TRUE) <= .Machine[[double.xmin]]) { - zp <- length(which(welch_padj <= .Machine[[double.xmin]])) - warning(zp, " P value(s) equal 0. Converting values less than ", - .Machine[[double.xmin]], " to minimum possible value...", - call. = FALSE) - welch_padj[welch_padj <= .Machine[[double.xmin]]] <- .Machine[[double.xmin]] + if (min(welch_padj, na.rm=TRUE) <= .Machine$double.xmin) { + zp <- length(which(welch_padj <= .Machine$double.xmin)) + message(zp, " P value(s) equal 0. Converting values less than ", .Machine$double.xmin, " to minimum possible value...", call. = FALSE) + welch_padj[welch_padj <= .Machine$double.xmin] <- .Machine$double.xmin } plusminus <- data.frame( ref_mean = group2_mean, @@ -124,159 +117,170 @@ bonferronicorrecteddifferences <- function( #' @param display boolean. Whether or not to display confidence intervals #' @param normalize_pattern Boolean. Whether or not to normalize pattern weights #' @param mode statistical approach, confidence intervals or pvalues. default CI -#' @return A list with unweighted/weighted mean differences and differential +#' @return A list with unweighted/weighted mean differences and differential #' genes that meet the provided signficance threshold. #' @export #' #' -projectionDriveR<-function( +projectionDriveR <- function( cellgroup1, #gene x cell count matrix for cell group 1 cellgroup2, #gene x cell count matrix for cell group 2 loadings, # a matrix of continous values to be projected with unique rownames - loadingsNames = NULL, # a vector with names of loadings rows pattern_name, + loadingsNames = NULL, # a vector with names of loadings rows pvalue = 1e-5, display = TRUE, normalize_pattern = TRUE, mode = "CI" -){ - - #Count matrices can be class matrix, data.frame, sparse.matrix, ... anything that is coercible by as.matrix() - +) { + message("Mode: ", mode) + #Count matrices can be anything that is coercible by as.matrix() #check that alpha significance level is appropriate - if(pvalue <= 0 | pvalue >= 1){ + if (pvalue <= 0L || pvalue >= 1L) { stop("pvalue must be numeric between 0 and 1") } - + #Make sure provided pattern string is a character vector of length one - if(length(pattern_name) != 1 | !is.character(pattern_name)){ + if (length(pattern_name) != 1L || !is.character(pattern_name)) { stop("provided pattern_name must be a character vector of length one") } - + #set loadings rownames if provided - if(!is.null(loadingsNames)){ + if (!is.null(loadingsNames)) { rownames(loadings) <- loadingsNames } - + #pattern weights must be formatted as a matrix for normalization - if(pattern_name %in% colnames(loadings)){ - pattern <- loadings[,pattern_name, drop = F] #data.frame + if (pattern_name %in% colnames(loadings)) { + pattern <- loadings[, pattern_name, drop = FALSE] #data.frame pattern <- Matrix::as.matrix(pattern) - } else { - stop(paste0("Provided pattern_name ",pattern_name, " is not a column in provided loadings")) + } else { + stop("Provided pattern_name ", pattern_name, " is not a column in provided loadings") } - message("Mode: ",mode) + #extract names of data objects group1name <- deparse(substitute(cellgroup1)) group2name <- deparse(substitute(cellgroup2)) - - #Filter the two count matrices and the pattern weights to include the intersection of their features + + #Filter the two count matrices and the pattern weights to include + #the intersection of their features #shared rows in two data matrices - filtered_data <-geneMatchR(data1=cellgroup1, data2=cellgroup2, data1Names=NULL, data2Names=NULL, merge=FALSE) - message(as.character(dim(filtered_data[[2]])[1]),' row names matched between datasets') - - cellgroup1 <- filtered_data[[2]] #geneMatchR flips the indexes - cellgroup2 <- filtered_data[[1]] - - + filtered_data <- geneMatchR(data1 = cellgroup1, + data2 = cellgroup2, + data1Names = NULL, + data2Names = NULL, + merge = FALSE) + message(as.character(dim(filtered_data[[2L]])[1L]), + " row names matched between datasets") + + cellgroup1 <- filtered_data[[2L]] #geneMatchR flips the indexes + cellgroup2 <- filtered_data[[1L]] + #shared rows in data matrices and loadings - filtered_weights <- geneMatchR(data1 = cellgroup1, data2 = pattern, data1Names = NULL, data2Names = NULL, merge = F) - dimensionality_final <- dim(filtered_weights[[2]])[1] - - message('Updated dimension of data: ',as.character(paste(dimensionality_final, collapse = ' '))) - - if(dimensionality_final == 0){ + filtered_weights <- geneMatchR(data1 = cellgroup1, + data2 = pattern, + data1Names = NULL, + data2Names = NULL, + merge = FALSE) + + dimensionality_final <- dim(filtered_weights[[2L]])[1L] + + message("Updated dimension of data: ", + as.character(paste(dimensionality_final, collapse = " "))) + + if (dimensionality_final == 0L) { stop("No features matched by rownames of count matrix and rownames of loadings") } - - pattern_filtered <- filtered_weights[[1]] - - cellgroup1_filtered <- filtered_weights[[2]] + + pattern_filtered <- filtered_weights[[1L]] + + cellgroup1_filtered <- filtered_weights[[2L]] #do second filtering on other cell group so all genes are consistent - cellgroup2_filtered <- cellgroup2[rownames(cellgroup1_filtered),] - - + cellgroup2_filtered <- cellgroup2[rownames(cellgroup1_filtered), ] + + #normalize pattern weights - if(normalize_pattern){ - weight_norm <- norm(pattern_filtered) #square of sums of squares (sum for all positive values) - num_nonzero <- sum(pattern_filtered > 0) #number of nonzero weights + if (normalize_pattern) { + weight_norm <- norm(pattern_filtered) #square of sums of squares + num_nonzero <- sum(pattern_filtered > 0L) #number of nonzero weights pattern_filtered <- pattern_filtered * num_nonzero / weight_norm } - #cast feature weights to a named vector - pattern_normalized_vec <- pattern_filtered[,1] + pattern_normalized_vec <- pattern_filtered[, 1L] names(pattern_normalized_vec) <- rownames(pattern_filtered) - + #weighted confidence intervals of differences in cluster means - weighted_drivers_bonferroni <- bonferronicorrecteddifferences(group1 = cellgroup1_filtered, - group2 = cellgroup2_filtered, - diff_weights = pattern_normalized_vec, - pvalue = pvalue, - mode = mode) + weighted_bonferroni <- bonferroniCorrectedDifferences( + group1 = cellgroup1_filtered, + group2 = cellgroup2_filtered, + diff_weights = pattern_normalized_vec, + pvalue = pvalue, + mode = mode) #unweighted confidence intervals of difference in cluster means - mean_bonferroni <- bonferronicorrecteddifferences(group1 = cellgroup1_filtered, - group2 = cellgroup2_filtered, - diff_weights = NULL, - pvalue = pvalue, - mode = mode) -#generate confidence interval mode - if(mode == "CI"){ - #Determine which genes have significantly non-zero mean difference and weighted mean difference - #significant - weighted_sig_idx <- apply(weighted_drivers_bonferroni[,1:2], 1, function(interval){ - (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) + mean_bonferroni <- bonferroniCorrectedDifferences( + group1 = cellgroup1_filtered, + group2 = cellgroup2_filtered, + diff_weights = NULL, + pvalue = pvalue, + mode = mode) +#generate confidence interval mode + if (mode == "CI") { + #Determine which genes have unweighted/ weighted mean difference + weighted_sig_idx <- apply(weighted_bonferroni[, 1L:2L], 1L, function(interval) { + (interval[1L] > 0L & interval[2L] > 0L) | (interval[1L] < 0L & interval[2L] < 0L) }) - - mean_sig_idx <- apply(mean_bonferroni[,1:2], 1, function(interval){ - (interval[1] > 0 & interval[2] > 0) | (interval[1] < 0 & interval[2] < 0) + + mean_sig_idx <- apply(mean_bonferroni[, 1L:2L], 1L, function(interval) { + (interval[1L] > 0L & interval[2L] > 0L) | (interval[1] < 0L & interval[2L] < 0L) }) - - weighted_sig_genes <- weighted_drivers_bonferroni[weighted_sig_idx,] + + weighted_sig_genes <- weighted_bonferroni[weighted_sig_idx,] unweighted_sig_genes <- mean_bonferroni[mean_sig_idx,] #genes that are collectively either up or down shared_genes <- base::intersect( - rownames(weighted_drivers_bonferroni)[weighted_sig_idx], + rownames(weighted_bonferroni)[weighted_sig_idx], rownames(mean_bonferroni)[mean_sig_idx]) - cat("the length of shared genes are:", length(shared_genes), '\n') - conf_intervals <- mean_bonferroni[shared_genes,] + message("the length of shared genes are: ", length(shared_genes)) + conf_intervals <- mean_bonferroni[shared_genes, ] sig_weights <- pattern_normalized_vec[shared_genes] - - weighted_conf_intervals <- weighted_drivers_bonferroni[shared_genes,] + + weighted_conf_intervals <- weighted_bonferroni[shared_genes, ] #create confidence interval plot (unweighted) pl <- plotConfidenceIntervals(conf_intervals, weights = sig_weights, pattern_name = pattern_name, - weighted = F) + weighted = FALSE) #weighted pl_w <- plotConfidenceIntervals(weighted_conf_intervals, weights = sig_weights, pattern_name = pattern_name, - weighted = T) - + weighted = TRUE) + plots <- list(unweighted = pl,weighted = pl_w) - if(display){ + if (display) { #print confidence interval pointrange plot pl1_u <- (cowplot::plot_grid(pl[["ci_estimates_plot"]], pl[["weights_heatmap"]], - ncol = 2, + ncol = 2L, align = "h", - rel_widths = c(1,.3))) + rel_widths = c(1.0,0.3))) pl2_w <- (cowplot::plot_grid(pl_w[["ci_estimates_plot"]], pl_w[["weights_heatmap"]], - ncol = 2, + ncol = 2L, align = "h", - rel_widths = c(1,.3))) - plt <- cowplot::plot_grid(pl1_u, pl2_w, ncol = 2, align = "h") + rel_widths = c(1.0,0.3))) + plt <- cowplot::plot_grid(pl1_u, pl2_w, ncol = 2L, align = "h") print(plt) } - - if(length(shared_genes) == 0){ + + if (length(shared_genes) == 0) { #no genes were significant. Return info we have and skip plotting. - warning("No features (and weighted features) were significantly differentially used between the two groups", call. = FALSE) + warning("No features were significantly differentially used", + call. = FALSE) result <- list(mean_ci = mean_bonferroni, - weighted_mean_ci = weighted_drivers_bonferroni, + weighted_mean_ci = weighted_bonferroni, normalized_weights = pattern_normalized_vec, significant_shared_genes = shared_genes, plotted_ci = NULL, @@ -285,10 +289,10 @@ projectionDriveR<-function( ) return(result) } - + result <- list( mean_ci = mean_bonferroni, - weighted_mean_ci = weighted_drivers_bonferroni, + weighted_mean_ci = weighted_bonferroni, normalized_weights = pattern_normalized_vec, sig_genes = list(unweighted_sig_genes = rownames(unweighted_sig_genes), weighted_sig_genes = rownames(weighted_sig_genes), @@ -297,18 +301,19 @@ projectionDriveR<-function( meta_data = list(reference_matrix = paste0(group2name), test_matrix = paste0(group1name)) ) - } else if (mode == "PV"){ + } else if (mode == "PV") { #create vector of significant genes from weighted and unweighted tests - weighted_PV_sig <- rownames(weighted_drivers_bonferroni[which(weighted_drivers_bonferroni$welch_padj <= pvalue),]) + weighted_PV_sig <- rownames(weighted_bonferroni[which(weighted_bonferroni$welch_padj <= pvalue),]) PV_sig <- rownames(mean_bonferroni[which(mean_bonferroni$welch_padj <= pvalue),]) #create vector of significant genes shared between weighted and unweighted tests shared_genes_PV <- base::intersect( PV_sig, weighted_PV_sig) - if(length(shared_genes_PV) == 0){ + if (length(shared_genes_PV) == 0L){ #no genes were significant. Return info we have and skip plotting. - warning("No features (and weighted features) were significantly differentially used between the two groups", call. = FALSE) + warning("No features were significantly differentially used ", + call. = FALSE) result <- list(mean_stats = mean_bonferroni, - weighted_mean_stats = weighted_drivers_bonferroni, + weighted_mean_stats = weighted_bonferroni, normalized_weights = pattern_normalized_vec, meta_data = list(reference_matrix = paste0(group2name), test_matrix = paste0(group1name), @@ -317,9 +322,9 @@ projectionDriveR<-function( return(result) } result <- list(mean_stats = mean_bonferroni, - weighted_mean_stats = weighted_drivers_bonferroni, + weighted_mean_stats = weighted_bonferroni, normalized_weights = pattern_normalized_vec, - sig_genes = list(PV_sig = PV_sig, + sig_genes = list(PV_sig = PV_sig, weighted_PV_sig = weighted_PV_sig, PV_significant_shared_genes = shared_genes_PV), meta_data = list(reference_matrix = paste0(group2name), @@ -328,13 +333,17 @@ projectionDriveR<-function( ) #apply pdVolcano function to result result <- pdVolcano(result, display = FALSE) - if(display){ + if (display) { #print volcano plots - pltgrid <- cowplot::plot_grid(result$plt$differential_expression + theme(legend.position = "none"), - result$plt$weighted_differential_expression + theme(legend.position = "none"), - ncol = 2, align = "h") - legend <- cowplot::get_legend(result$plt$differential_expression + guides(color = guide_legend(nrow = 1)) +theme(legend.position = "bottom")) - plt <- cowplot::plot_grid(pltgrid, legend, ncol = 1, rel_heights = c(1, .1)) + pltgrid <- cowplot::plot_grid(result$plt$differential_expression + + theme(legend.position = "none"), + result$plt$weighted_differential_expression + + theme(legend.position = "none"), + ncol = 2L, align = "h") + legend <- cowplot::get_legend(result$plt$differential_expression + + guides(color = guide_legend(nrow = 1L)) + + theme(legend.position = "bottom")) + plt <- cowplot::plot_grid(pltgrid, legend, ncol = 1L, rel_heights = c(1.0, 0.1)) print(plt) } } else { @@ -342,4 +351,3 @@ projectionDriveR<-function( } return(result) } - From 2f96c605ccdd00aaf6c6b8f5db0566b6ece888c2 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Fri, 16 Feb 2024 13:02:58 -0500 Subject: [PATCH 26/33] Update examples Added examples for PV mode projectionDrivers Added fgsea examples for pdVolcano function --- vignettes/projectR.Rmd | 1 - 1 file changed, 1 deletion(-) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index dcc4b98..5aadaed 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -491,7 +491,6 @@ pl2 <- plots_list[["weights_heatmap"]] weighted_conf_intervals <- drivers_ci$weighted_mean_ci[gene_order,] plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, sort = F, - pattern_name = pattern_to_weight, weighted = T) pl3 <- plots_list_weighted[["ci_estimates_plot"]] + From 55167e0c91efd9e4191e271d7e05e09a53ecd7ac Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Fri, 16 Feb 2024 13:57:42 -0500 Subject: [PATCH 27/33] Update plotting.R --- R/plotting.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 8258343..763dea9 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -213,13 +213,17 @@ plotVolcano <- function( #' @return A list with weighted and unweighted differential expression metrics #' @export #plot FC, weighted and unweighted. Designed for use with the output of projectionDriveRs -pdVolcano <- function(result, - FC = 0.2, - pvalue = NULL, - subset = NULL, - filter.inf = FALSE, - label.num = 5L, - display = TRUE) { +pdVolcano <- function( + result, + FC = 0.2, + pvalue = NULL, + subset = NULL, + filter.inf = FALSE, + label.num = 5L, + display = TRUE) { + + #bind local variables + welch_padj <- Color <- NULL #if a genelist is provided, use them to subset the output of projectiondrivers if (!is.null(subset)) { #subset the mean_stats object by provided gene list From 8e731b223559e9cfaa857899417a5c272a4cf738 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Tue, 20 Feb 2024 16:09:39 -0500 Subject: [PATCH 28/33] Update projectR.html Knit rmd using projectiondriver branch. Should be rerun after merge --- vignettes/projectR.html | 198 +++++++++++++++++++++++++++++----------- 1 file changed, 147 insertions(+), 51 deletions(-) diff --git a/vignettes/projectR.html b/vignettes/projectR.html index f2b78b9..e85231e 100644 --- a/vignettes/projectR.html +++ b/vignettes/projectR.html @@ -15,7 +15,7 @@ - + projectR Vignette @@ -249,6 +249,7 @@ div.csl-bib-body { } div.csl-entry { clear: both; +margin-bottom: 0em; } .hanging div.csl-entry { margin-left:2em; @@ -729,7 +730,7 @@

projectR Vignette

Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien

-

17 January 2024

+

20 February 2024

@@ -795,12 +796,12 @@

Contents

  • 7.2.1 Input
  • 7.2.2 Output
  • 7.2.3 Customize plotting of confidence intervals
  • - -
  • 7.3 multivariateAnalysisR -
  • References
  • @@ -820,7 +821,7 @@

    2.1 Installation Instructions

    2.2 Methods

    -

    Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to “omic” analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless.

    +

    Projection can roughly be defined as a mapping or transformation of points from one space to another often lower dimensional space. Mathematically, this can described as a function \(\varphi(x)=y : \Re^{D} \mapsto \Re^{d}\) s.t. \(d \leq D\) for \(x \in \Re^{D}, y \in \Re^{d}\) Barbakh, Wu, and Fyfe (2009) . The projectR package uses projection functions defined in a training dataset to interrogate related biological phenomena in an entirely new data set. These functions can be the product of any one of several methods common to “omic” analyses including regression, PCA, NMF, clustering. Individual sections focusing on one specific method are included in the vignette. However, the general design of the projectR function is the same regardless.

    2.3 The base projectR function

    @@ -851,7 +852,7 @@

    3 PCA projection

    3.1 Obtaining PCs to project.

    # data to define PCs
    -library(projectR)
    +#library(projectR)
     data(p.RNAseq6l3c3t)
     
     # do PCA on RNAseq6l3c3t expression data
    @@ -884,7 +885,7 @@ 

    3.2 Projecting prcomp objects

    # data to project into PCs from RNAseq6l3c3t expression data data(p.ESepiGen4c1l) -library(projectR) +#library(projectR) PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t, full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]])
    ## [1] "93 row names matched between data and loadings"
    @@ -921,7 +922,7 @@ 

    4 NMF projection

    \end{equation}\] The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling.

    Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the limma package. The projectR function has S4 method for class Linear Embedding Matrix, LME.

    -
    library(projectR)
    +
    #library(projectR)
     projectR(data, loadings,dataNames = NULL, loadingsNames = NULL,
          NP = NA, full = FALSE)
    @@ -942,7 +943,7 @@

    4.0.2 Output

    4.1 Obtaining CoGAPS patterns to project.

    # get data
    -library(projectR)
    +#library(projectR)
     AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data
     AP <- AP$Amean
     # heatmap of gene weights for CoGAPs patterns
    @@ -958,12 +959,12 @@ 

    4.1 Obtaining CoGAPS patterns to cexCol=1,cexRow=.5,scale = "row", hclustfun=function(x) hclust(x, method="average") )

    -

    +

    4.2 Projecting CoGAPS objects

    # data to project into PCs from RNAseq6l3c3t expression data
    -library(projectR)
    +#library(projectR)
     data('p.ESepiGen4c1l4')
    ## Warning in data("p.ESepiGen4c1l4"): data set 'p.ESepiGen4c1l4' not found
    data('p.RNAseq6l3c3t')
    @@ -1077,7 +1078,7 @@ 

    6.1.2 Output

    6.2 Obtaining and visualizing correlateR objects.

    # data to
    -library(projectR)
    +#library(projectR)
     data("p.RNAseq6l3c3t")
     
     # get genes correlated to T
    @@ -1124,14 +1125,14 @@ 

    6.2 Obtaining and visualizing

    -

    +

    6.3 Projecting correlateR objects.

    # data to project into from RNAseq6l3c3t expression data
     data(p.ESepiGen4c1l)
     
    -library(projectR)
    +#library(projectR)
     cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE,
         dataNames=map.ESepiGen4c1l$GeneSymbols)
    ## [1] "9 row names matched between data and loadings"
    @@ -1156,52 +1157,74 @@ 

    7.1.1 Input Arguments

    loadings Matrix or dataframe with features as rows, columns as patterns. Values define feature weights in that space
    loadingsNames Vector of names corresponding to rows of loadings. By default the rownames of loadings will be used
    pattern_name the column name of the loadings by which the features will be weighted
    -pvalue Determines the significance of the confidence interval to be calculated between the difference of means
    +pvalue Determines the significance of the confidence interval to be calculated between the difference of means. Default 1e-5 display Boolean. Whether or not to plot the estimates of significant features. Default = T
    normalize_pattern Boolean. Whether or not to normalize the average feature weight. Default = T
    mode ‘CI’ or ‘PV’. Specifies whether to run projectionDriveR in confidence interval mode or to generate p values. Default = “CI”

    7.1.2 Output

    -

    The output of projectionDriveR is a list of length five mean_ci holds the confidence intervals for the difference in means for all features, weighted_mean_ci holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, sig_genes is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (unweighted_sig_genes), the weighted mean confidence intervals (weighted_sig_genes) and genes shared between the two (significant_shared_genes) . plotted_ci returns the ggplot figure of the confidence intervals, see plotConfidenceIntervals for documentation.

    +

    The output of projectionDriveR in confidence interval mode (‘CI’) is a list of length six mean_ci holds the confidence intervals for the difference in means for all features, weighted_mean_ci holds the confidence intervals for the weighted difference in means for all features, and normalized_weights are the weights themselves. In addition, sig_genes is a list of three vectors of gene names that are significantly different at the threshold provided generated from the mean confidence intervals (unweighted_sig_genes), the weighted mean confidence intervals (weighted_sig_genes) and genes shared between the two (significant_shared_genes) . plotted_ci returns the ggplot figure of the confidence intervals, see plotConfidenceIntervals for documentation. meta_data contains matrix names and pvalue thresholds. The output of projectionDriveR in p value mode (‘PV’) is a list of length nine. meta_data, sig_genes and normalized_weights are similar between modes. mean_stats and weighted_mean_stats contains summary information for welch t-tests. difexpgenes and weighted_difexpgenes are filtered dataframes containing differentially expressed genes at a FC and pvalue cut off of 0.2 and 1e-5 respectively. fgseavecs contain unweighted and weighted named vectors of welch-t test estimates that can be used with fgsea. plt returns the volcano ggplot figure. See pdVolcano for documentation. FC and pvalue can be manually altered by calling pdVolcano on projectionDriveR result.

    7.1.3 Identifying differential features associated with learned patterns

    options(width = 60)
    -library(projectR)
    +#library(projectR)
     library(dplyr, warn.conflicts = F)
     
    -#gene weights x pattern
    -data("retinal_patterns")
    -
     #size-normed, log expression
     data("microglial_counts")
     
     #size-normed, log expression
     data("glial_counts")
     
    +#5 pattern cogaps object generated on microglial_counts
    +data("cr_microglial")
    +microglial_fl <- cr_microglial@featureLoadings
    +
     #the features by which to weight the difference in expression 
    -pattern_to_weight <- "Pattern.24"
    -drivers <- projectionDriveR(microglial_counts, #expression matrix
    +pattern_to_weight <- "Pattern_1"
    +drivers_ci <- projectionDriveR(microglial_counts, #expression matrix
                                            glial_counts, #expression matrix
    -                                       loadings = retinal_patterns, #feature x pattern dataframe
    +                                       loadings = microglial_fl, #feature x pattern dataframe
                                            loadingsNames = NULL,
                                            pattern_name = pattern_to_weight, #column name
                                            pvalue = 1e-5, #pvalue before bonferroni correction
    -                                       display = F,
    +                                       display = T,
                                            normalize_pattern = T, #normalize feature weights
                                            mode = "CI") #confidence interval mode
    -
    ## [1] "Mode: CI"
    -## [1] "2996 row names matched between datasets"
    -## [1] "Updated dimension of data: 2996"
    -## the length of shared genes are: 253
    -
    conf_intervals <- drivers$mean_ci[drivers$sig_genes$significant_shared_genes,]
    +

    +
    conf_intervals <- drivers_ci$mean_ci[drivers_ci$sig_genes$significant_shared_genes,]
    +
     
     str(conf_intervals)
    -
    ## 'data.frame':    253 obs. of  3 variables:
    -##  $ low : num  1.86 0.158 -0.562 -0.756 0.155 ...
    -##  $ high: num  2.03943 0.26729 -0.00197 -0.18521 0.23239 ...
    -##  $ gene: chr  "ENSMUSG00000026126" "ENSMUSG00000025993" "ENSMUSG00000025959" "ENSMUSG00000045658" ...
    +
    ## 'data.frame':    330 obs. of  3 variables:
    +##  $ low : num  -1.009 0.102 1.86 -2.089 -0.791 ...
    +##  $ high: num  -0.35 0.356 2.039 -1.359 -0.28 ...
    +##  $ gene: chr  "ENSMUSG00000067879" "ENSMUSG00000026158" "ENSMUSG00000026126" "ENSMUSG00000060424" ...
    +
    drivers_pv <- projectionDriveR(microglial_counts, #expression matrix
    +                                       glial_counts, #expression matrix
    +                                       loadings = microglial_fl, #feature x pattern dataframe
    +                                       loadingsNames = NULL,
    +                                       pattern_name = pattern_to_weight, #column name
    +                                       pvalue = 1e-5, #pvalue before bonferroni correction
    +                                       display = T,
    +                                       normalize_pattern = T, #normalize feature weights
    +                                       mode = "PV") #confidence interval mode
    +

    +
    difexp <- drivers_pv$difexpgenes
    +str(difexp)
    +
    ## 'data.frame':    440 obs. of  10 variables:
    +##  $ ref_mean    : num  0.3193 0.7124 0.108 0.0145 1.9462 ...
    +##  $ test_mean   : num  0.0127 0.0331 0.3367 1.964 0.2223 ...
    +##  $ mean_diff   : num  -0.307 -0.679 0.229 1.949 -1.724 ...
    +##  $ estimate    : num  -27.46 -35.77 7.81 41.72 -51.84 ...
    +##  $ welch_pvalue: num  1.45e-150 2.97e-234 4.14e-14 1.06e-150 3.99e-253 ...
    +##  $ welch_padj  : num  4.36e-147 8.91e-231 1.24e-10 3.17e-147 1.20e-249 ...
    +##  $ gene        : chr  "ENSMUSG00000002459" "ENSMUSG00000067879" "ENSMUSG00000026158" "ENSMUSG00000026126" ...
    +##  $ Color       : Factor w/ 3 levels "NS or FC < 0.2",..: 2 2 3 3 2 2 3 2 2 2 ...
    +##  $ invert_P    : num  -44.87 -156.26 2.26 285.6 -429.14 ...
    +##  $ label       : chr  NA NA NA NA ...
    @@ -1240,7 +1263,7 @@

    7.2.3 Customize plotting of confi #the labels above can now be used as ggplot aesthetics plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals sort = F, #should genes be sorted by estimates - weights = drivers$normalized_weights[rownames(conf_intervals)], + weights = drivers_ci$normalized_weights[rownames(conf_intervals)], pattern_name = pattern_to_weight, weights_clip = 0.99, weights_vis_norm = "none") @@ -1251,10 +1274,9 @@

    7.2.3 Customize plotting of confi pl2 <- plots_list[["weights_heatmap"]] #now plot the weighted differences -weighted_conf_intervals <- drivers$weighted_mean_ci[gene_order,] +weighted_conf_intervals <- drivers_ci$weighted_mean_ci[gene_order,] plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, sort = F, - pattern_name = pattern_to_weight, weighted = T) pl3 <- plots_list_weighted[["ci_estimates_plot"]] + @@ -1262,13 +1284,86 @@

    7.2.3 Customize plotting of confi theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank()) cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3)

    -
    ## Warning: Removed 249 rows containing missing values
    +
    ## Warning: Removed 326 rows containing missing values
     ## (`geom_label_repel()`).
    -

    +

    +## pdVolcano

    +
    +

    7.2.4 Input

    +

    The arguments for pdVolcano are:

    +

    result Output from projectionDriveR function with PV mode selected +FC fold change threshold, default at 0.2 +pvalue significance threshold, default set to pvalue stored in projectionDriveR output +subset optional vector of gene names to subset the result by +filter.inf Boolean. If TRUE will remove genes that have pvalues below machine double minimum value
    +label.num number of genes to label on either end of volcano plot, default to 5 (10 total) +display Boolean. Default TRUE, will print volcano plots using cowplot grid_arrange

    -
    -

    7.3 multivariateAnalysisR

    +
    +

    7.2.5 Output

    +

    Generates the same output as projectionDriveR. Allows manual updates to pvalue and FC thresholds and can accept gene lists of interest to subset results.

    +
    +
    +

    7.2.6 Customize plotting of confidence intervals

    +
    suppressWarnings(library(cowplot))
    +library(fgsea)
    +library(msigdbr)
    +#manually change FC and pvalue threshold from defaults 0.2, 1e-5
    +drivers_pv_mod <- pdVolcano(drivers_pv, FC = 0.5, pvalue = 1e-7)
    +
    ## Updating sig_genes...
    +

    +
    difexp_mod <- drivers_pv_mod$difexpgenes
    +str(difexp_mod)
    +
    ## 'data.frame':    213 obs. of  10 variables:
    +##  $ ref_mean    : num  0.7124 0.0145 1.9462 0.6037 0.742 ...
    +##  $ test_mean   : num  0.0331 1.964 0.2223 0.0681 0.1416 ...
    +##  $ mean_diff   : num  -0.679 1.949 -1.724 -0.536 -0.6 ...
    +##  $ estimate    : num  -35.8 41.7 -51.8 -28.8 -23.5 ...
    +##  $ welch_pvalue: num  2.97e-234 1.06e-150 3.99e-253 5.10e-139 5.41e-92 ...
    +##  $ welch_padj  : num  8.91e-231 3.17e-147 1.20e-249 1.53e-135 1.62e-88 ...
    +##  $ gene        : chr  "ENSMUSG00000067879" "ENSMUSG00000026126" "ENSMUSG00000060424" "ENSMUSG00000045515" ...
    +##  $ Color       : Factor w/ 3 levels "NS or FC < 0.5",..: 2 3 2 2 2 3 2 2 2 2 ...
    +##  $ invert_P    : num  -156.3 285.6 -429.1 -72.2 -52.7 ...
    +##  $ label       : chr  NA NA NA NA ...
    +
    #fgsea application 
    +
    +#extract unweighted fgsea vector
    +fgseavec <- drivers_pv$fgseavecs$unweightedvec
    +#split into enrichment groups, negative estimates are enriched in the reference matrix (glial), positive are enriched in the test matrix (microglial), take abs value 
    +glial_fgsea_vec <- abs(fgseavec[which(fgseavec < 0)])
    +microglial_fgsea_vec <- abs(fgseavec[which(fgseavec > 0)])
    +
    +#get FGSEA pathways - selecting subcategory C8 for cell types
    +msigdbr_list =  msigdbr::msigdbr(species = "Mus musculus", category = "C8")
    +pathways = split(x = msigdbr_list$ensembl_gene, f = msigdbr_list$gs_name)
    +
    +#run fgsea scoreType positive, all values will be positive
    +glial_fgsea <- fgsea::fgsea(pathways, glial_fgsea_vec, scoreType = "pos")
    +#tidy 
    +glial_fgseaResTidy <- glial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>%
    +    as_tibble() %>%
    +    dplyr::arrange(desc(size))
    +#plot 
    +glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + 
    +coord_flip() + 
    +labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + 
    +theme_minimal()
    +glial_EnrichmentPlot
    +

    +
    microglial_fgsea <- fgsea::fgsea(pathways, microglial_fgsea_vec, scoreType = "pos")
    +#tidy 
    +microglial_fgseaResTidy <- microglial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>%
    +    as_tibble() %>%
    +    dplyr::arrange(desc(size))
    +
    +microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + 
    +coord_flip() + 
    +labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + 
    +theme_minimal()
    +microglial_EnrichmentPlot
    +

    +## multivariateAnalysisR

    This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. multivariateAnalysisR performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function.

    library(projectR)
     multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj,
    @@ -1276,8 +1371,9 @@ 

    7.3 multivariateAnalysisR

    exportFolder = "", ANOVAwidth = 1000, ANOVAheight = 1000, CIwidth = 1000, CIheight = 1000, CIspacing = 1)
    -
    -

    7.3.1 Input Arguments

    +
    +
    +

    7.2.7 Input Arguments

    The required inputs are patternKeys (list of strings indicating the patterns to be evaluated), seuratobj (the Seurat Object data containing both clusters and patterns), and dictionaries (list of dictionary where each dictionary indicates the conditions each corresponding cluster has to satisfy).

    The arguments for multivariateAnalysisR are:

    significanceLevel Double value for testing significance in ANOVA test.
    @@ -1293,19 +1389,19 @@

    7.3.1 Input Arguments

    CIheight Height of CI png. CIspacing Spacing between each CI in CI graph.

    -
    -

    7.3.2 Output

    +
    +

    7.2.8 Output

    multivariateAnalysisR returns a sorted list of the generated ANOVA and CI values. It also exports two pairs of exported PNG/CSV files: one for ANOVA analysis, another for CI. From the ANOVA analysis, researchers can see the general ranking of differential uses of patterns across the specified clusters. From the CI analysis, researchers can identify the specific differential use cases between every pair of clusters.

    -
    -

    7.3.3 Comparing differential uses of patterns across different clusters

    +
    +

    7.2.9 Comparing differential uses of patterns across different clusters

    Demonstrative example will be added soon.

    References

    -
    +
    Barbakh, Wesam Ashour, Ying Wu, and Colin Fyfe. 2009. Review of Linear Projection Methods.” In Non-Standard Parameter Adaptation for Exploratory Data Analysis, 29–48. Berlin, Heidelberg: Springer Berlin Heidelberg.
    From 5ed503425496ea858f3d461c716a63a875f7996f Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 21 Feb 2024 09:15:19 -0500 Subject: [PATCH 29/33] Update plotting.R Removed unnecessary == FALSE/TRUE statements --- R/plotting.R | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 763dea9..31bbfe4 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -43,10 +43,10 @@ plotConfidenceIntervals <- function( stop("weights_vis_norm must be either 'none' or 'quantiles'") } - if (weighted == FALSE) { - lab = "Unweighted" + if (weighted) { + lab <- "Weighted" } else { - lab = "Weighted" + lab <- "Unweighted" } #gene names were stored as rownames, store high and low estimates confidence_intervals$gene_names <- rownames(confidence_intervals) @@ -101,10 +101,10 @@ plotConfidenceIntervals <- function( #if provided, create heatmap for pattern weights if (!is.null(weights)) { - + #label with pattern name if provided hm_name <- ifelse(is.null(pattern_name), "weights", pattern_name) - + #maintain established order from the pointrange plot ordered_weights <- weights[rownames(confidence_intervals)] confidence_intervals$weights <- ordered_weights @@ -144,7 +144,7 @@ plotConfidenceIntervals <- function( } ################################################################################ #' plotVolcano -#' +#' #' Volcano plotting function #' @param stats data frame with differential expression statistics #' @param metadata #metadata from pdVolcano @@ -233,7 +233,7 @@ pdVolcano <- function( } - if (filter.inf == TRUE) { + if (filter.inf) { #remove p values below the machine limit representation for plotting purposes cat("Filtering", length(which(result$mean_stats$welch_padj <= .Machine$double.xmin)), "unweighted genes and", length(which(result$weighted_mean_stats$welch_padj <= .Machine$double.xmin)), "weighted genes", "\n") @@ -241,11 +241,16 @@ pdVolcano <- function( result$weighted_mean_stats <- subset(result$weighted_mean_stats, welch_padj > .Machine$double.xmin) } - if (is.numeric(FC) == FALSE) { + if (!is.numeric(FC)) { stop('FC must be a number') } - if (is.null(pvalue) == FALSE) { + if (!is.null(pvalue)) { + + if (!is.numeric(pvalue)) { + stop('p value must be a number') + } + message("Updating sig_genes...") #update previously stored pvalue pvalue <- pvalue @@ -343,7 +348,7 @@ pdVolcano <- function( meta_data = metadata, plt = list(differential_expression = unweightedvolcano, weighted_differential_expression = weightedvolcano)) - if (display == TRUE) { + if (display) { #print volcano plots pltgrid <- cowplot::plot_grid(vol_result$plt$differential_expression + theme(legend.position = "none"), From 9ac52250d0befb7456a7ce1eda56aa0958244617 Mon Sep 17 00:00:00 2001 From: rpalaganas Date: Wed, 21 Feb 2024 09:44:17 -0500 Subject: [PATCH 30/33] update vignette --- vignettes/projectR.Rmd | 59 +++++++++++++++-------------- vignettes/projectR.html | 82 +++++++++++++++++++++-------------------- 2 files changed, 75 insertions(+), 66 deletions(-) diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd index 5aadaed..1efa253 100644 --- a/vignettes/projectR.Rmd +++ b/vignettes/projectR.Rmd @@ -23,6 +23,7 @@ vignette: > knitr::opts_chunk$set(echo = TRUE) options(scipen = 1, digits = 2) set.seed(1234) +library(projectR) ``` # Introduction @@ -45,7 +46,7 @@ Projection can roughly be defined as a mapping or transformation of points from The generic projectR function is executed as follows: ``` -library(projectR) + projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false) ``` @@ -71,7 +72,7 @@ Projection of principal components is achieved by matrix multiplication of a new ## Obtaining PCs to project. ```{r prcomp, warning=FALSE} # data to define PCs -library(projectR) + data(p.RNAseq6l3c3t) # do PCA on RNAseq6l3c3t expression data @@ -80,7 +81,7 @@ pcVAR <- round(((pc.RNAseq6l3c3t$sdev)^2/sum(pc.RNAseq6l3c3t$sdev^2))*100,2) dPCA <- data.frame(cbind(pc.RNAseq6l3c3t$x,pd.RNAseq6l3c3t)) #plot pca -library(ggplot2) + setCOL <- scale_colour_manual(values = c("blue","black","red"), name="Condition:") setFILL <- scale_fill_manual(values = c("blue","black","red"),guide = FALSE) setPCH <- scale_shape_manual(values=c(23,22,25,25,21,24),name="Cell Line:") @@ -105,7 +106,7 @@ pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line, # data to project into PCs from RNAseq6l3c3t expression data data(p.ESepiGen4c1l) -library(projectR) + PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t, full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) @@ -149,7 +150,7 @@ The number of rows in ${\bf{P}}$ (columns in ${\bf{A}}$) defines the number of b Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where ${\bf{A}}$ is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns ${\bf{P}}$ in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the `lmFit` function in the `r BiocStyle::Biocpkg("limma")` package. The `projectR` function has S4 method for class `Linear Embedding Matrix, LME`. ``` -library(projectR) + projectR(data, loadings,dataNames = NULL, loadingsNames = NULL, NP = NA, full = FALSE) ``` @@ -174,7 +175,7 @@ The basic output of the base projectR function, i.e. `full=FALSE`, returns `proj ```{r} # get data -library(projectR) + AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data AP <- AP$Amean # heatmap of gene weights for CoGAPs patterns @@ -190,7 +191,7 @@ pNMF<-heatmap.2(as.matrix(AP),col=bluered, trace='none', ## Projecting CoGAPS objects ```{r} # data to project into PCs from RNAseq6l3c3t expression data -library(projectR) + data('p.ESepiGen4c1l4') data('p.RNAseq6l3c3t') @@ -224,7 +225,7 @@ As canonical projection is not defined for clustering objects, the projectR pack `cluster2pattern` uses the corelation of each genes expression to the mean of each cluster to define continuous weights. ``` -library(projectR) + data(p.RNAseq6l3c3t) @@ -255,7 +256,6 @@ The output of the `cluster2pattern` function is a `pclust` class object; specifi `intersectoR` function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for `hclust` and `kmeans` objects are also available. ``` -library(projectR) intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL) ``` @@ -281,7 +281,7 @@ Correlation based projection requires a matrix of gene-wise correlation values t ## correlateR ``` -library(projectR) + correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...) ``` @@ -305,7 +305,7 @@ The output of the `correlateR` function is a `correlateR` class object. Specific ```{r correlateR-exp} # data to -library(projectR) + data("p.RNAseq6l3c3t") # get genes correlated to T @@ -352,7 +352,7 @@ pCorT # data to project into from RNAseq6l3c3t expression data data(p.ESepiGen4c1l) -library(projectR) + cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE, dataNames=map.ESepiGen4c1l$GeneSymbols) @@ -366,7 +366,7 @@ cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE, Given loadings that define the weight of features (genes) in a given latent space (e.g. PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages. ``` -library(projectR) + projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL, pvalue, pattern_name, display = T, normalize_pattern = T, mode = "CI") @@ -394,7 +394,6 @@ The output of `projectionDriveR` in confidence interval mode ('CI') is a list of ```{r projectionDriver, message = F, out.width="100%"} options(width = 60) -library(projectR) library(dplyr, warn.conflicts = F) #size-normed, log expression @@ -518,9 +517,9 @@ The arguments for pdVolcano are: ### Output Generates the same output as projectionDriveR. Allows manual updates to pvalue and FC thresholds and can accept gene lists of interest to subset results. -### Customize plotting of confidence intervals +### Customize volcano plot and run FGSEA -```{r fig.width=10, fig.height=11} +```{r fig.width=9, fig.height=10} suppressWarnings(library(cowplot)) library(fgsea) library(msigdbr) @@ -545,26 +544,32 @@ pathways = split(x = msigdbr_list$ensembl_gene, f = msigdbr_list$gs_name) #run fgsea scoreType positive, all values will be positive glial_fgsea <- fgsea::fgsea(pathways, glial_fgsea_vec, scoreType = "pos") #tidy -glial_fgseaResTidy <- glial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% +glial_fgseaResTidy <- glial_fgsea %>% + subset(padj <= 0.05 & size >= 10 & size <= 500) %>% as_tibble() %>% dplyr::arrange(desc(size)) #plot -glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + -coord_flip() + -labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + -theme_minimal() +glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, + ggplot2::aes(reorder(pathway, NES), NES)) + + ggplot2::geom_point(aes(size=size, color = padj)) + + coord_flip() + + labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + + theme_minimal() glial_EnrichmentPlot microglial_fgsea <- fgsea::fgsea(pathways, microglial_fgsea_vec, scoreType = "pos") #tidy -microglial_fgseaResTidy <- microglial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% +microglial_fgseaResTidy <- microglial_fgsea %>% + subset(padj <= 0.05 & size >= 10 & size <= 500) %>% as_tibble() %>% dplyr::arrange(desc(size)) -microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + -coord_flip() + -labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + -theme_minimal() +microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, + ggplot2::aes(reorder(pathway, NES), NES)) + + ggplot2::geom_point(aes(size=size, color = padj)) + + coord_flip() + + labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + + theme_minimal() microglial_EnrichmentPlot @@ -575,7 +580,7 @@ microglial_EnrichmentPlot This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. `multivariateAnalysisR` performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function. ``` -library(projectR) + multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj, dictionaries, customNames = NULL, exclusive = TRUE, exportFolder = "", ANOVAwidth = 1000, diff --git a/vignettes/projectR.html b/vignettes/projectR.html index e85231e..98ac29b 100644 --- a/vignettes/projectR.html +++ b/vignettes/projectR.html @@ -15,7 +15,7 @@ - + projectR Vignette @@ -730,7 +730,7 @@

    projectR Vignette

    Gaurav Sharma, Charles Shin, Jared N. Slosberg, Loyal A. Goff and Genevieve L. Stein-O'Brien

    -

    20 February 2024

    +

    21 February 2024

    @@ -798,7 +798,7 @@

    Contents

  • 7.2.3 Customize plotting of confidence intervals
  • 7.2.4 Input
  • 7.2.5 Output
  • -
  • 7.2.6 Customize plotting of confidence intervals
  • +
  • 7.2.6 Customize volcano plot and run FGSEA
  • 7.2.7 Input Arguments
  • 7.2.8 Output
  • 7.2.9 Comparing differential uses of patterns across different clusters
  • @@ -826,7 +826,7 @@

    2.2 Methods

    2.3 The base projectR function

    The generic projectR function is executed as follows:

    -
    library(projectR)
    +
    
     projectR(data, loadings, dataNames=NULL, loadingsNames=NULL, NP = NULL, full = false)

    2.3.1 Input Arguments

    @@ -852,7 +852,7 @@

    3 PCA projection

    3.1 Obtaining PCs to project.

    # data to define PCs
    -#library(projectR)
    +
     data(p.RNAseq6l3c3t)
     
     # do PCA on RNAseq6l3c3t expression data
    @@ -861,7 +861,7 @@ 

    3.1 Obtaining PCs to project.

    3.2 Projecting prcomp objects

    # data to project into PCs from RNAseq6l3c3t expression data data(p.ESepiGen4c1l) -#library(projectR) + PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t, full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]])
    ## [1] "93 row names matched between data and loadings"
    @@ -922,7 +922,7 @@ 

    4 NMF projection

    \end{equation}\] The number of rows in \({\bf{P}}\) (columns in \({\bf{A}}\)) defines the number of biological patterns (k) that CoGAPS/GWCoGAPS will infer from the number of nonorthogonal basis vectors required to span the data space. As in the Bayesian Decomposition algorithm Wang, Kossenkov, and Ochs (2006), the matrices \({\bf{A}}\) and \({\bf{P}}\) in CoGAPS are assumed to have the atomic prior described in Sibisi and Skilling (1997). In the CoGAPS/GWCoGAPS implementation, \(\alpha_{A}\) and \(\alpha_{P}\) are corresponding parameters for the expected number of atoms which map to each matrix element in \({\bf{A}}\) and \({\bf{P}}\), respectively. The corresponding matrices \({\bf{A}}\) and \({\bf{P}}\) are found by MCMC sampling.

    Projection of CoGAPS/GWCoGAPS patterns is implemented by solving the factorization in (1) for the new data matrix where \({\bf{A}}\) is the fixed nonorthogonal basis vectors comprising the average of the posterior mean for the CoGAPS/GWCoGAPS simulations performed on the original data. The patterns \({\bf{P}}\) in the new data associated with this amplitude matrix is estimated using the least-squares fit to the new data implemented with the lmFit function in the limma package. The projectR function has S4 method for class Linear Embedding Matrix, LME.

    -
    #library(projectR)
    +
    
     projectR(data, loadings,dataNames = NULL, loadingsNames = NULL,
          NP = NA, full = FALSE)
    @@ -943,7 +943,7 @@

    4.0.2 Output

    4.1 Obtaining CoGAPS patterns to project.

    # get data
    -#library(projectR)
    +
     AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data
     AP <- AP$Amean
     # heatmap of gene weights for CoGAPs patterns
    @@ -959,12 +959,12 @@ 

    4.1 Obtaining CoGAPS patterns to cexCol=1,cexRow=.5,scale = "row", hclustfun=function(x) hclust(x, method="average") )

    -

    +

    4.2 Projecting CoGAPS objects

    # data to project into PCs from RNAseq6l3c3t expression data
    -#library(projectR)
    +
     data('p.ESepiGen4c1l4')
    ## Warning in data("p.ESepiGen4c1l4"): data set 'p.ESepiGen4c1l4' not found
    data('p.RNAseq6l3c3t')
    @@ -1008,7 +1008,7 @@ 

    5 Clustering projection

    5.1 cluster2pattern

    cluster2pattern uses the corelation of each genes expression to the mean of each cluster to define continuous weights.

    -
    library(projectR)
    +
    
     data(p.RNAseq6l3c3t)
     
     
    @@ -1034,8 +1034,7 @@ 

    5.1.2 Output

    5.2 intersectoR

    intersectoR function can be used to test for significant overlap between two clustering objects. The base function finds and tests the intersecting values of two sets of lists, presumably the genes associated with patterns in two different datasets. S4 class methods for hclust and kmeans objects are also available.

    -
    library(projectR)
    -intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL)
    +
    intersectoR(pSet1 = NA, pSet2 = NA, pval = 0.05, full = FALSE, k = NULL)

    5.2.1 Input Arguments

    The inputs that must be set each time are the clusters and data.

    @@ -1057,7 +1056,7 @@

    6 Correlation based projectionCorrelation based projection requires a matrix of gene-wise correlation values to serve as the Pattern input to the projectR function. This matrix can be user-generated or the result of the correlateR function included in the projectR package. User-generated matrixes with each row corresponding to an individual gene can be input to the generic projectR function. The correlateR function allows users to create a weight matrix for projection with values quantifying the within dataset correlation of each genes expression to the expression pattern of a particular gene or set of genes as follows.

    6.1 correlateR

    -
    library(projectR)
    +
    
     correlateR(genes = NA, dat = NA, threshtype = "R", threshold = 0.7, absR = FALSE, ...)

    6.1.1 Input Arguments

    @@ -1078,7 +1077,7 @@

    6.1.2 Output

    6.2 Obtaining and visualizing correlateR objects.

    # data to
    -#library(projectR)
    +
     data("p.RNAseq6l3c3t")
     
     # get genes correlated to T
    @@ -1125,14 +1124,14 @@ 

    6.2 Obtaining and visualizing

    -

    +

    6.3 Projecting correlateR objects.

    # data to project into from RNAseq6l3c3t expression data
     data(p.ESepiGen4c1l)
     
    -#library(projectR)
    +
     cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE,
         dataNames=map.ESepiGen4c1l$GeneSymbols)
    ## [1] "9 row names matched between data and loadings"
    @@ -1144,7 +1143,7 @@ 

    7 Differential features identific

    7.1 projectionDriveR

    Given loadings that define the weight of features (genes) in a given latent space (e.g. PCA, NMF), and the use of these patterns in samples, it is of interest to look at differential usage of these features between conditions. These conditions may be defined by user-defined annotations of cell type or by differential usage of a (projected) pattern. By examining differences in gene expression, weighted by the loadings that define their importance in a specific latent space, a unique understanding of differential expression in that context can be gained. This approach was originally proposed and developed in (Baraban et al, 2021), which demonstrates its utility in cross-celltype and cross-species interpretation of pattern usages.

    -
    library(projectR)
    +
    
     projectionDriveR(cellgroup1, cellgroup2, loadings, loadingsNames = NULL,
                      pvalue, pattern_name, display = T, normalize_pattern = T, mode = "CI")
     
    @@ -1169,7 +1168,6 @@

    7.1.2 Output

    7.1.3 Identifying differential features associated with learned patterns

    options(width = 60)
    -#library(projectR)
     library(dplyr, warn.conflicts = F)
     
     #size-normed, log expression
    @@ -1193,7 +1191,7 @@ 

    7.1.3 Identifying differential fe display = T, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode

    -

    +

    conf_intervals <- drivers_ci$mean_ci[drivers_ci$sig_genes$significant_shared_genes,]
     
     
    @@ -1211,7 +1209,7 @@ 

    7.1.3 Identifying differential fe display = T, normalize_pattern = T, #normalize feature weights mode = "PV") #confidence interval mode

    -

    +

    difexp <- drivers_pv$difexpgenes
     str(difexp)
    ## 'data.frame':    440 obs. of  10 variables:
    @@ -1286,7 +1284,7 @@ 

    7.2.3 Customize plotting of confi cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3)

    ## Warning: Removed 326 rows containing missing values
     ## (`geom_label_repel()`).
    -

    +

    ## pdVolcano

    @@ -1304,15 +1302,15 @@

    7.2.4 Input

    7.2.5 Output

    Generates the same output as projectionDriveR. Allows manual updates to pvalue and FC thresholds and can accept gene lists of interest to subset results.

    -
    -

    7.2.6 Customize plotting of confidence intervals

    +
    +

    7.2.6 Customize volcano plot and run FGSEA

    suppressWarnings(library(cowplot))
     library(fgsea)
     library(msigdbr)
     #manually change FC and pvalue threshold from defaults 0.2, 1e-5
     drivers_pv_mod <- pdVolcano(drivers_pv, FC = 0.5, pvalue = 1e-7)
    ## Updating sig_genes...
    -

    +

    difexp_mod <- drivers_pv_mod$difexpgenes
     str(difexp_mod)
    ## 'data.frame':    213 obs. of  10 variables:
    @@ -1341,31 +1339,37 @@ 

    7.2.6 Customize plotting of confi #run fgsea scoreType positive, all values will be positive glial_fgsea <- fgsea::fgsea(pathways, glial_fgsea_vec, scoreType = "pos") #tidy -glial_fgseaResTidy <- glial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% +glial_fgseaResTidy <- glial_fgsea %>% + subset(padj <= 0.05 & size >= 10 & size <= 500) %>% as_tibble() %>% dplyr::arrange(desc(size)) #plot -glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + -coord_flip() + -labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + -theme_minimal() +glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, + ggplot2::aes(reorder(pathway, NES), NES)) + + ggplot2::geom_point(aes(size=size, color = padj)) + + coord_flip() + + labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + + theme_minimal() glial_EnrichmentPlot

    -

    +

    microglial_fgsea <- fgsea::fgsea(pathways, microglial_fgsea_vec, scoreType = "pos")
     #tidy 
    -microglial_fgseaResTidy <- microglial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>%
    +microglial_fgseaResTidy <- microglial_fgsea %>% 
    +  subset(padj <= 0.05 & size >= 10 & size <= 500) %>%
         as_tibble() %>%
         dplyr::arrange(desc(size))
     
    -microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + 
    -coord_flip() + 
    -labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + 
    -theme_minimal()
    +microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, 
    +                                             ggplot2::aes(reorder(pathway, NES), NES)) +
    +  ggplot2::geom_point(aes(size=size, color = padj)) + 
    +  coord_flip() + 
    +  labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + 
    +  theme_minimal()
     microglial_EnrichmentPlot
    -

    +

    ## multivariateAnalysisR

    This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. multivariateAnalysisR performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function.

    -
    library(projectR)
    +
    
     multivariateAnalysisR <- function(significanceLevel = 0.05, patternKeys, seuratobj,
                                       dictionaries, customNames = NULL, exclusive = TRUE,
                                       exportFolder = "", ANOVAwidth = 1000,
    
    From 713c55b49369c40c96bd934458841cf32b902539 Mon Sep 17 00:00:00 2001
    From: rpalaganas 
    Date: Wed, 21 Feb 2024 09:46:43 -0500
    Subject: [PATCH 31/33] Update projectR.Rmd
    
    ---
     vignettes/projectR.Rmd | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd
    index 1efa253..bfd2515 100644
    --- a/vignettes/projectR.Rmd
    +++ b/vignettes/projectR.Rmd
    @@ -501,7 +501,7 @@ cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3
     
     
     ```
    -## pdVolcano
    +##pdVolcano
     
     ### Input
     The arguments for pdVolcano are:
    @@ -575,7 +575,7 @@ microglial_EnrichmentPlot
     
     
     ```
    -## multivariateAnalysisR
    +##multivariateAnalysisR
     
     This function performs multivariate analysis on different clusters within a dataset, which in this case is restricted to Seurat Object. Clusters are identified as those satisfying the conditions specified in their corresponding dictionaries. `multivariateAnalysisR` performs two tests: Analysis of Variance (ANOVA) and Confidence Interval (CI) evaluations. ANOVA is performed to understand the general differentiation between clusters through the lens of a specified pattern. CI is to visualize pair-wise differential expressions between two clusters for each pattern. Researchers can visually understand both the macroscopic and microscopic differential uses of each pattern across different clusters through this function.
     
    
    From 7f78f928dacc33cf73dc55ed8e47a8c7b035783f Mon Sep 17 00:00:00 2001
    From: rpalaganas 
    Date: Wed, 21 Feb 2024 10:40:33 -0500
    Subject: [PATCH 32/33] Update projectR.Rmd
    
    ---
     vignettes/projectR.Rmd | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd
    index bfd2515..275451f 100644
    --- a/vignettes/projectR.Rmd
    +++ b/vignettes/projectR.Rmd
    @@ -72,7 +72,7 @@ Projection of principal components is achieved by matrix multiplication of a new
     ## Obtaining PCs to project.
     ```{r prcomp, warning=FALSE}
     # data to define PCs
    -
    +library(ggplot2)
     data(p.RNAseq6l3c3t)
     
     # do PCA on RNAseq6l3c3t expression data
    
    From 1d441270364c2adecf10ae3e055c0706cfb93abe Mon Sep 17 00:00:00 2001
    From: rpalaganas 
    Date: Wed, 21 Feb 2024 11:53:26 -0500
    Subject: [PATCH 33/33] Update projectR.Rmd
    
    ---
     vignettes/projectR.Rmd | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/vignettes/projectR.Rmd b/vignettes/projectR.Rmd
    index 275451f..1401e7f 100644
    --- a/vignettes/projectR.Rmd
    +++ b/vignettes/projectR.Rmd
    @@ -105,7 +105,7 @@ pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line,
     ```{r projectR.prcomp, warning=FALSE}
     # data to project into PCs from RNAseq6l3c3t expression data
     data(p.ESepiGen4c1l)
    -
    +library(ggplot2)
     
     PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t,
     full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]])