diff --git a/NAMESPACE b/NAMESPACE index 11637e3..971d46e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,10 +29,12 @@ export(lift) export(manhattan) export(miami) export(mr_collider_bias) +export(plot_coloc_probabilities) export(plot_correction_stability) export(plot_drug_proxy_instrument) export(plot_slope) export(plot_slopehunter_iters) +export(qq_plot) export(set_1000G_reference) export(set_dbsnp_directory) export(set_plink2) @@ -76,9 +78,12 @@ importFrom(progressr,progressor) importFrom(progressr,with_progress) importFrom(rtracklayer,import.chain) importFrom(rtracklayer,liftOver) +importFrom(stats,as.formula) importFrom(stats,cov) +importFrom(stats,median) importFrom(stats,pchisq) importFrom(stats,pnorm) +importFrom(stats,qchisq) importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,sd) diff --git a/R/class_column_map.R b/R/class_column_map.R index 733741a..6d1e6b4 100644 --- a/R/class_column_map.R +++ b/R/class_column_map.R @@ -12,7 +12,7 @@ ColumnMapping <- function() { ieu_ukb = list(SNP="SNP",BETA="BETA",SE="SE",EA="ALLELE1", OA="ALLELE0", EAF="A1FREQ", P="P_BOLT_LMM_INF"), ieugwasr = list(RSID="rsid",CHR="chr",BP="position","EA"="ea",OA="nea",EAF="eaf",BETA="beta",SE="se", P="p", N="n"), ns_map = list(SNP="MARKER",CHR="CHR",BP="POS",EA="A1", OA="A2", BETA="BETA", SE="SE", EAF="EAF", P="P"), - gwama = list(SNP="rs_number",EA="reference_allele", OA="other_allele", BETA="beta", SE="se", EAF="eaf", P="p-value", BETA_95U="beta_95U",BETA_95L="beta_95L", Z="z", LOG10_P="_-log10_p-value", Q_STAT="q_statistic",I2="i2", N_STUDIES="n_studies", N="n_samples", EFFECTS="effects"), + gwama = list(RSID="rs_number",CHR="chromosome",BP="position",EA="reference_allele", OA="other_allele", BETA="beta", SE="se", EAF="eaf", P="p-value", BETA_95U="beta_95U",BETA_95L="beta_95L", Z="z", LOG10_P="_-log10_p-value", Q_STAT="q_statistic",I2="i2", N_STUDIES="n_studies", N="n_samples", EFFECTS="effects"), giant = list(RSID="SNP",SNP="missing",CHR="CHR", BP="POS","EA"="Tested_Allele", OA="Other_Allele", EAF="Freq_Tested_Allele", BETA="BETA", SE="SE", P="P", N="N", INFO="INFO") ) diff --git a/R/clump.R b/R/clump.R index 6659f46..d16baea 100644 --- a/R/clump.R +++ b/R/clump.R @@ -69,33 +69,45 @@ clump <- function(gwas, # run clumping system(cmd) - # the clumped plink output - clumped <- data.table::fread(paste0(plink_output,".clumps"), nThread=parallel::detectCores()) + # check for clump output + clump_path <- paste0(plink_output,".clumps") - # add index - clumped[, clump := seq_len(.N)] + if(file.exists(clump_path)) { - # pivot longer - clumped_long <- clumped[, list(clump_member = unlist(data.table::tstrsplit(SP2, ","))), by=c("ID","clump")] + # the clumped plink output + clumped <- data.table::fread(clump_path, nThread=parallel::detectCores()) - # clean e.g. rs1763611(G) --> rs1763611 - clumped_long[, clump_member := sub("(rs[0-9]+).*", "\\1", clump_member)] + # add index + clumped[, clump := seq_len(.N)] - # set the key - data.table::setkey(clumped_long, ID) - data.table::setkey(gwas, RSID) + # pivot longer + clumped_long <- clumped[, list(clump_member = unlist(data.table::tstrsplit(SP2, ","))), by=c("ID","clump")] - # flag the index SNPs with TRUE and the clump number - gwas[clumped_long, c("index","clump") := list(TRUE, clump)] + # clean e.g. rs1763611(G) --> rs1763611 + clumped_long[, clump_member := sub("(rs[0-9]+).*", "\\1", clump_member)] - # set key to the clump member rsID to join again - data.table::setkey(clumped_long, clump_member) + # set the key + data.table::setkey(clumped_long, ID) + data.table::setkey(gwas, RSID) - # flag the clump members as not the index SNP and with the clump number - gwas[clumped_long, c("index", "clump") := list(FALSE, i.clump)] + # flag the index SNPs with TRUE and the clump number + gwas[clumped_long, c("index","clump") := list(TRUE, clump)] - # code clump as a factor - gwas[, clump := factor(clump, levels=sort(unique(gwas$clump)))] + # set key to the clump member rsID to join again + data.table::setkey(clumped_long, clump_member) + + # flag the clump members as not the index SNP and with the clump number + gwas[clumped_long, c("index", "clump") := list(FALSE, i.clump)] + + # code clump as a factor + gwas[, clump := factor(clump, levels=sort(unique(gwas$clump)))] + + } else { + + warning("No clumps were found witht he provided parameters, or plink failed") + gwas[, c("index", "clump") := list(FALSE, NA_integer_)] + + } # the (potential) plink clumping log files that might be produced # TODO: check if there are others diff --git a/R/collider_bias.R b/R/collider_bias.R index 6b67a1e..8d01df7 100644 --- a/R/collider_bias.R +++ b/R/collider_bias.R @@ -9,7 +9,9 @@ #' @param bse a numeric, the standard error of the correction factor #' @param pi a numeric, the final Pi value, proportion of SNPs in the Gi SNP cluster #' @param fit a data.table, the raw points data for plotting table columns `c("SNP_incidence","BETA_incidence","BETA_progression","CLUSTER")` +#' @param intercept a numeric, the y slope intercept #' @param entropy a numeric, the entropy - output from the Slope-hunter method +#' #' @return an S3 object of class ColliderBiasResult #' @export #' @@ -586,6 +588,7 @@ mr_collider_bias <- function(gwas_i, #' } #' ...will lead to 27 separate analyses. \cr #' @inheritParams slopehunter +#' @inheritParams mr_collider_bias #' @param methods a character vector of collider bias method function names to run #' @return a list of S3 ColliderBiasResult objects #' @export @@ -639,7 +642,7 @@ analyse_collider_bias <- function(gwas_i, plot_slope <- function(results) { # silence RCMD checks - BETA_incidence = BETA_progression = CLUSTER = hjustvar = vjustvar = xpos = ypos = label = slope = intercept = ip = NULL + BETA_incidence = BETA_progression = CLUSTER = hjustvar = vjustvar = xpos = ypos = label = slope = intercept = ip = hunted_snps_label = NULL # make list of one if a single ColliderBiasResult object if(inherits(results, "ColliderBiasResult")) { diff --git a/R/colocalisation.R b/R/colocalisation.R new file mode 100644 index 0000000..93899e6 --- /dev/null +++ b/R/colocalisation.R @@ -0,0 +1,140 @@ +#' @title Coloc probability plot +#' @description +#' A plotting wrapper for the `coloc` package. Produces a ggplot for either +#' the prior or posterior probability sensitivity analyses. See the +#' [coloc](https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html) +#' package vignettes for details. +#' @param coloc coloc object, output from `coloc::coloc.abf()` +#' @param rule a string, a valid rule indicating success e.g. "H4 > 0.5" +#' @param type a string, either `prior` or `posterior` +#' @return a ggplot +#' @export +#' @references [coloc](https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html) +plot_coloc_probabilities <- function(coloc, rule="H4 > 0.5", type="prior") { + + # RCMD check warnings + h <- x <- NULL + + # checks + type <- match.arg(type, choices=c('prior','posterior')) + stopifnot("list" %in% class(coloc)) + stopifnot("priors" %in% names(coloc)) + stopifnot("summary" %in% names(coloc)) + rule.init <- rule + rule <- gsub("(H.)","PP.\\1.abf",rule,perl=TRUE) + + # extract the results + results <- coloc$results + pp <- coloc$summary + p12 <- coloc$priors["p12"] + p1 <- coloc$priors["p1"] + p2 <- coloc$priors["p2"] + + # do the tests + check <- function(pp) { with(as.list(pp),eval(parse(text=rule))) } + testp12 <- 10^seq(log10(p1*p2),log10(min(p1,p1)),length.out=100) + testH <- prior.snp2hyp(pp["nsnps"],p12=testp12,p1=p1,p2=p2) + testpp <- data.table::as.data.table(prior.adjust(summ=pp,newp12=testp12,p1=p1,p2=p2,p12=p12)) + names(testpp) <- gsub("(H.)","PP.\\1.abf",colnames(testpp),perl=TRUE) + pass <- check(testpp) + w <- which(pass) + + # base plot to add to + p <- ggplot2::ggplot() + + # which plot to create + if(type=="prior") { + + # create the data.frame for prior probabilities + prior_dat <- data.table::data.table(testH) + prior_dat[, x := testp12] + prior_dat <- data.table::melt(prior_dat, + id.vars = c("x"), + measure.vars = paste0("H",0:4), + variable.name = "h", + value.name = "p") + prior_dat[, h := factor(h, levels=paste0("H",0:4))] + + p_max <- max(prior_dat$p[prior_dat$h != "H0"], na.rm=T) + + if(any(pass)) { + p <- p + ggplot2::geom_rect(ggplot2::aes(xmin=testp12[min(w)], xmax=testp12[max(w)], ymin=0, ymax=p_max), fill="green", alpha=0.1, color="green") + } + + p <- p + + ggplot2::geom_point(data = prior_dat, mapping = ggplot2::aes(x=x, y=p, fill=h), color="#333333", shape=21, size=3, stroke=0.25) + + ggplot2::lims(y=c(0,p_max)) + + ggplot2::labs(title = "Prior probabilities", subtitle = paste("shaded region:",rule.init)) + + } else if(type=="posterior") { + + # create the data.frame for posterior probabilities + posterior_dat <- data.table::data.table(as.matrix(testpp)) + posterior_dat[, x := testp12] + posterior_dat <- data.table::melt(posterior_dat, + id.vars = c("x"), + measure.vars = paste0("PP.H",0:4,".abf"), + variable.name = "h", + value.name = "p") + posterior_dat[, h := factor(h, levels=paste0("PP.H",0:4,".abf"))] + + p_max <- max(posterior_dat$p, na.rm=T) + + if(any(pass)) { + p <- p + ggplot2::geom_rect(ggplot2::aes(xmin=testp12[min(w)], xmax=testp12[max(w)], ymin=0, ymax=p_max), fill="green", alpha=0.1, color="green") + } + + p <- p + + ggplot2::geom_point(data = posterior_dat, mapping = ggplot2::aes(x=x, y=p, fill=h), color="#333333", shape=21, size=3, stroke=0.25) + + ggplot2::lims(y=c(0,p_max)) + + ggplot2::labs(title = "Posterior probabilities", subtitle = paste("shaded region:",rule.init)) + + } else { + + stop("type parameter error") + + } + + # add the common elements + p <- p + + ggplot2::scale_fill_manual(values = c("#ffffffff",viridis::viridis(5,alpha=1)[-1])) + + ggplot2::geom_vline(xintercept = p12, linetype="dashed", color="gray") + + ggplot2::scale_x_continuous(trans='log10') + + ggplot2::labs(x="p12", y="Prob") + + ggplot2::annotate("text", x=p12, y=0.5, label="results", angle=90, color="gray40") + + ggplot2::theme(legend.position = "top", + legend.title = ggplot2::element_blank()) + + # return + return(p) +} + + +# copied from coloc package to make the above work +prior.adjust <- function(summ,newp12,p1=1e-4,p2=1e-4,p12=1e-6) { + if(is.list(summ) && "summary" %in% names(summ)) + summ <- summ$summary + if(!identical(names(summ), c("nsnps", "PP.H0.abf", "PP.H1.abf", "PP.H2.abf", "PP.H3.abf", "PP.H4.abf"))) + stop("not a coloc summary vector") + ## back calculate likelihoods + f <- function(p12) + prior.snp2hyp(summ["nsnps"],p12=p12,p1=p1,p2=p2) + pr1 <- f(newp12) + pr0 <- matrix(f(p12),nrow=nrow(pr1),ncol=ncol(pr1),byrow=TRUE) + newpp <- matrix(summ[-1],nrow=nrow(pr1),ncol=ncol(pr1),byrow=TRUE) * pr1/pr0 # prop to, not equal to + newpp/rowSums(newpp) +} + + +# copied from coloc package to make the above work +prior.snp2hyp <- function(nsnp,p12=1e-6,p1=1e-4,p2=1e-4) { + if(any(p12 p1) || any(p12 > p2)) + return(NULL) + tmp <- cbind(nsnp * p1, + nsnp * p2, + nsnp * (nsnp-1) * p1 * p2, + nsnp * p12) + tmp <- cbind(1-rowSums(tmp),tmp) + colnames(tmp) <- paste0("H",0:4) + tmp +} diff --git a/R/manhattan.R b/R/manhattan.R index 07cc099..c96ed6c 100644 --- a/R/manhattan.R +++ b/R/manhattan.R @@ -1,5 +1,5 @@ # silence R CMD checks for data.table columns -BETA = BP = CHR = P = SE = SNP = chr_len = tot = x = i.tot = highlight = NULL +BETA = BP = CHR = P = SE = SNP = chr_len = tot = x = i.tot = highlight = secondary_highlight = NULL #' @title Manhattan plot #' @description @@ -65,7 +65,7 @@ manhattan <- function(gwas, # annotate_snps = c("15:135277414[b37]G,T") # create factor from CHR - gwas[, "CHR" := as.factor(CHR)] + gwas[, "CHR" := factor(CHR, levels=c(as.character(1:25), setdiff(as.character(unique(CHR)), as.character(1:25))))] # down-sample the insignificant dots for plotting if(downsample > 0) { @@ -118,26 +118,19 @@ manhattan <- function(gwas, ) # add highlighted variants within requested kb BP window - if(!is.null(highlight_snps)) { + if(!is.null(highlight_snps) & length(highlight_snps)>0) { gwas[SNP %in% highlight_snps, highlight := TRUE] if(highlight_win > 0) { highlight_data <- gwas[SNP %in% highlight_snps, ] for(i in 1:nrow(highlight_data)) { bp <- highlight_data[[i, "BP"]] chr<- highlight_data[[i, "CHR"]] - gwas[BP > (bp - highlight_win*1000) & BP < (bp + highlight_win*1000) & CHR == chr, highlight := TRUE] + gwas[BP > (bp - highlight_win*1000) & BP < (bp + highlight_win*1000) & CHR == chr, secondary_highlight := TRUE] } } plot <- plot + - ggplot2::geom_point(data = gwas[highlight==TRUE, ], color=highlight_colour, shape=highlight_shape, alpha=highlight_alpha, size=1.5) - } - - # add SNP label annotations - if(!is.null(annotate_snps)) { - gwas[SNP %in% annotate_snps, annotate := TRUE] - label_x_nudge <- max(gwas[["x"]], na.rm=TRUE) / 22 - plot <- plot + - ggrepel::geom_label_repel(data=gwas[annotate==TRUE, ], ggplot2::aes(label=SNP), colour="black", label.size = 0.1, nudge_x=label_x_nudge ) + ggplot2::geom_point(data = gwas[secondary_highlight==TRUE, ], color=highlight_colour, fill=highlight_colour, alpha=highlight_alpha) + + ggplot2::geom_point(data = gwas[highlight==TRUE, ], color=highlight_colour, fill=highlight_colour, shape=highlight_shape, alpha=highlight_alpha, size=3) } # add horizontal significance lines @@ -148,6 +141,14 @@ manhattan <- function(gwas, plot <- plot + ggplot2::geom_hline(yintercept = -log10(sig_line_2), linetype = "dashed", color = "darkgrey") } + # add SNP label annotations + if(!is.null(annotate_snps) & length(annotate_snps)>0) { + gwas[SNP %in% annotate_snps, annotate := TRUE] + label_x_nudge <- max(gwas[["x"]], na.rm=TRUE) / 22 + plot <- plot + + ggrepel::geom_label_repel(data=gwas[annotate==TRUE, ], ggplot2::aes(label=SNP), colour="black", label.size = 0.1, nudge_x=label_x_nudge ) + } + # add titles and labels plot <- plot + ggplot2::labs(title=title, subtitle=subtitle, x="Chromosome", y=log10P) @@ -262,12 +263,15 @@ miami <- function(gwases, if(!is.null(sig_line_2[[2]])) { max_p <- max(c(max_p, -log10(sig_line_2[[2]])),na.rm=T) } - y_limits <- c(ceiling(max_p), 0) + y_limits[[2]] <- c(ceiling(max_p), 0) + } else { + # user usually provides in the normal way c(0, upper) + y_limits[[2]] <- rev(y_limits[[2]]) } # flip the bottom plot plot_lower <- plot_lower + - ggplot2::scale_y_reverse(limits=y_limits, expand=c(0, 0)) + + ggplot2::scale_y_reverse(limits=y_limits[[2]], expand=c(0, 0)) + ggplot2::scale_x_continuous(expand=c(0.01, 0.01), position = "top") + ggplot2::theme(axis.title.x=ggplot2::element_blank(), axis.line.x=ggplot2::element_blank(), diff --git a/R/qq_plot.R b/R/qq_plot.R index c7b2542..893c970 100644 --- a/R/qq_plot.R +++ b/R/qq_plot.R @@ -1,52 +1,126 @@ -# cat("Plotting QQ:", "[", this_study, this_outcome, "]\n") - -# # gather the data for the QQ plot -# qq_dat <- dt |> -# dplyr::filter(!is.na(P), -# !is.nan(P), -# !is.null(P), -# is.finite(P), -# P < 1.0, -# P > 0.0) |> -# dplyr::mutate(chisq = qchisq(1-P, 1)) |> -# dplyr::group_by(QC_STATUS) |> -# dplyr::mutate(observed = -log10(sort(P, decreasing=FALSE)), -# expected = -log10(stats::ppoints(dplyr::n())), -# clower = -log10(qbeta(p = (1 - 0.95) / 2, shape1 = 1:dplyr::n(), shape2 = dplyr::n():1)), -# cupper = -log10(qbeta(p = (1 + 0.95) / 2, shape1 = 1:dplyr::n(), shape2 = dplyr::n():1))) |> -# dplyr::ungroup() -# -# -# # generate QQ-plot -# -# # calculate the lambda_gc and generate labels df -# lambda_gc_pre = round( median(qq_dat$chisq[qq_dat$QC_STATUS == "PRE-QC"]) / qchisq(0.5, 1), digits=2) -# lambda_gc_post = round( median(qq_dat$chisq[qq_dat$QC_STATUS == "POST-QC"]) / qchisq(0.5, 1), digits=2) -# ann_text <- data.frame(expected = c(5,5), -# observed = c(2,2), -# label = paste0("\u03BB = ", c(lambda_gc_pre, lambda_gc_post))) -# -# # axis labels -# log10Pe <- expression(paste("Expected -log"[10], plain(P))) -# log10Po <- expression(paste("Observed -log"[10], plain(P))) -# -# # plot -# plot <- qq_dat |> -# ggplot(aes(x = expected, y = observed)) + -# geom_point(size = 0.5, color=wes_palette("GrandBudapest2")[4]) + -# geom_ribbon(mapping = aes(x = expected, ymin = clower, ymax = cupper), -# alpha = 0.1) + -# geom_abline(slope=1, intercept=0, color="darkred", linetype = "dotted") + -# geom_text(data = ann_text, aes(x=expected, y=observed, label=label), size=6) + -# labs(title = paste0("QQ plot - [", this_study, "][", this_outcome, "]"), -# x = log10Pe, -# y = log10Po, -# color = NULL) + -# theme_light(base_size = 22) + -# theme(aspect.ratio = 1) + -# facet_grid(cols = vars(QC_STATUS)) -# -# # save plot -# grDevices::png(filename=paste0(base_fig_path, "qq_plot.png"), bg="white", height=600, width=1200, units="px") -# print(plot) -# grDevices::dev.off() +#' @title QQ plot +#' @param gwas a data.frame like object or valid file path +#' @param pval_col the P value column +#' @param title a string, the title for the plot +#' @param subtitle a string, the subtitle for the plot +#' @param facet_grid_row_col a string, the column name in `gwas` by which to facet the plot (rows) +#' @param facet_grid_col_col a string, the column name in `gwas` by which to facet the plot (cols) +#' @param colours a 2 element list of colour codes (1-the uncorrected points, 2-the GC corrected points) +#' @param plot_corrected a logical, whether to apply and plot the lambda correction +#' @param facet_nrow an integer, passed to facet_wrap, the number of rows to facet by (if only facet_grid_row_col is provided) +#' @param facet_ncol an integer, passed to facet_wrap, the number of cols to facet by (if only facet_grid_col_col is provided) +#' +#' @return a ggplot +#' @export +#' @importFrom stats qchisq median as.formula +#' +qq_plot <- function(gwas, + pval_col = "P", + colours = list(raw="#2166AC", corrected="#B2182B"), + title = NULL, + subtitle = NULL, + plot_corrected = FALSE, + facet_grid_row_col = NULL, + facet_grid_col_col = NULL, + facet_nrow = NULL, + facet_ncol = NULL) { + + # silence R CMD check warnings + P <- adj_P <- adj_chisq <- adj_observed <- chisq <- clower <- corrected <- cupper <- expected <- label <- lambda <- observed <- NULL + + # get the gwas + gwas <- import_table(gwas) + + # checks + stopifnot("Colours must be length 1, or length 2 if `plot_corrected` is TRUE" = length(colours)==1 | length(colours)==2 & plot_corrected) + stopifnot("There must be a `P` column" = pval_col %in% names(gwas)) + data.table::setnames(gwas, pval_col, "P") + if(any(gwas[, is.na(P) | is.nan(P) | is.null(P) | is.infinite(P) | P<0.0 | P>1.0])) { + invalid_idx <- which(gwas[, is.na(P) | is.nan(P) | is.null(P) | is.infinite(P) | P<0.0 | P>1.0]) + print(gwas[invalid_idx, ]) + warning("Invalid P values, please remove") + } + if(!is.null(facet_grid_row_col)) { + stopifnot("The row faceting column, if given, must be in the data source" = facet_grid_row_col %in% names(gwas)) + } + if(!is.null(facet_grid_col_col)) { + stopifnot("The col faceting column, if given, must be in the data source" = facet_grid_col_col %in% names(gwas)) + } + + # gather the data for the QQ plot + gwas[, chisq := stats::qchisq(1-P, 1)] + + facets <- c(facet_grid_row_col, facet_grid_col_col) + + # calculate lambda + gwas[, lambda := stats::median(chisq) / qchisq(0.5, 1), by=facets] + + # new / adjusted chisq statistic and P + gwas[, adj_chisq := chisq/lambda] + gwas[, adj_P := stats::pchisq(adj_chisq, 1, lower.tail=FALSE)] + + # calculate observed and lambda + plot_data <- gwas[, list(lambda = median(chisq) / qchisq(0.5, 1), + observed = -log10(sort(P, decreasing=FALSE)), + adj_observed = -log10(sort(adj_P, decreasing=FALSE)), + expected = -log10(stats::ppoints(.N)), + clower = -log10(stats::qbeta(p = (1 - 0.95) / 2, shape1 = 1:.N, shape2 = .N:1)), + cupper = -log10(stats::qbeta(p = (1 + 0.95) / 2, shape1 = 1:.N, shape2 = .N:1))), + by=facets] + + # generate QQ-plot + + # axis labels + log10Pe <- expression(paste("Expected -log"[10], plain(P))) + log10Po <- expression(paste("Observed -log"[10], plain(P))) + + # lambda labels + lambda_labels <- plot_data[, list(lambda = lambda[1]), by=facets] + lambda_labels[, expected := 3.0] + lambda_labels[, observed := 0.25] + lambda_labels[, label := paste0("\u03BB = ", format(round(lambda, 3), nsmall=3))] + + # downsample for faster plotting + data.table::setorder(plot_data, expected) + plot_data <- plot_data[sample(x = 1:.N, + replace = TRUE, # replace samples, for speed + size = ifelse(5000>.N,.N,5000)+ifelse(.N-5000<0,0,floor((.N-5000)*0.05)), # size to return is 5000 + 5% of original data + prob = ifelse(rep(5000>.N,.N),rep(1,.N),c(seq(0,1,length.out=.N-floor(.N*0.05)), rep(1,floor(.N*0.05))))), ] # probability of choosing row is lowest for 0-95th percentile, and certain for the top 5th centile + + # plot + plot <- plot_data |> + ggplot2::ggplot(ggplot2::aes(x = expected, y = observed)) + + ggplot2::geom_point(size = 0.5, color=colours[[1]]) + + ggplot2::geom_ribbon(mapping = ggplot2::aes(x = expected, ymin = clower, ymax = cupper), alpha = 0.1, color="transparent") + + ggplot2::geom_abline(slope=1, intercept=0, color="darkgrey", linetype = "dotted") + + ggplot2::geom_text(data = lambda_labels, ggplot2::aes(x=expected, y=observed, label=label), color="black", show.legend = FALSE) + + ggplot2::labs(title = title, + subtitle = subtitle, + x = log10Pe, + y = log10Po, + color = NULL) + + ggplot2::theme_light() + + ggplot2::theme(aspect.ratio = 1) + + if(plot_corrected) { + plot_data[, corrected := "Corrected"] + plot <- plot + + ggplot2::geom_point(data = plot_data, + mapping = ggplot2::aes(x = expected, y = adj_observed, color=corrected), size = 0.5, inherit.aes=FALSE) + + ggplot2::scale_color_manual(values=c(colours[[2]])) + + ggplot2::theme(legend.position="top") + + ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size=3))) + } + + if(!is.null(facet_grid_row_col) & !is.null(facet_grid_col_col)) { + plot <- plot + ggplot2::facet_grid(facet_grid_row_col ~ facet_grid_col_col) + } else if(!is.null(facet_grid_row_col)) { + plot <- plot + ggplot2::facet_wrap(stats::as.formula(paste("~", facet_grid_row_col)), nrow=facet_nrow, ncol=facet_ncol) + } else if(!is.null(facet_grid_col_col)) { + plot <- plot + ggplot2::facet_wrap(stats::as.formula(paste("~", facet_grid_col_col)), nrow=facet_nrow, ncol=facet_ncol) + } + + # return + return(plot) + +} diff --git a/_pkgdown.yml b/_pkgdown.yml index d92c5ba..bd994a3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -56,8 +56,10 @@ reference: contents: - manhattan - miami + - qq_plot - eaf_plot - generate_random_gwas_data + - plot_coloc_probabilities - title: "Setup" desc: > diff --git a/docs/articles/chrpos_to_rsid.html b/docs/articles/chrpos_to_rsid.html index 673fa63..7ace84c 100644 --- a/docs/articles/chrpos_to_rsid.html +++ b/docs/articles/chrpos_to_rsid.html @@ -104,7 +104,7 @@

CHR:POS to RSID mapping

 library(genepi.utils)
 
-gwas <- data.table::fread(system.file("extdata", "example_gwas_sumstats.tsv", package="genepi.utils"))
+gwas <- data.table::fread(system.file("extdata", "example_gwas_sumstats.tsv", package="genepi.utils"))
 
 head(gwas)
 #>    chromosome base_pair_location effect_allele other_allele
@@ -275,7 +275,7 @@ 

Allele specification / matching
-gwas_id_coding <- data.table::fread(system.file("extdata", "example3_gwas_sumstats.tsv", package="genepi.utils"))
+gwas_id_coding <- data.table::fread(system.file("extdata", "example3_gwas_sumstats.tsv", package="genepi.utils"))
 
 gwas_with_rsids <- genepi.utils::chrpos_to_rsid(dt      = gwas_id_coding,
                                                 chr_col = "chromosome",
@@ -330,7 +330,7 @@ 

Evaluation speedlibrary(ggplot2) # some GWAS data -dt <- data.table::fread( gwas_sumstats_4.3million_rows ) +dt <- data.table::fread( gwas_sumstats_4.3million_rows ) # benchmarking mbm <- microbenchmark("chrpos_to_rsid: no alleles, no alt rsids" = { diff --git a/docs/articles/clumping.html b/docs/articles/clumping.html index ca6b492..906562b 100644 --- a/docs/articles/clumping.html +++ b/docs/articles/clumping.html @@ -146,7 +146,7 @@

Setup library(genepi.utils) # the gwas data -gwas <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils")) +gwas <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils")) # ensure annotated with rsID gwas <- standardise_gwas(gwas, input_format="ns_map", build="GRCh37", populate_rsid="b37_dbsnp156") diff --git a/docs/articles/collider_bias.html b/docs/articles/collider_bias.html index 5bc23d9..5dd4f93 100644 --- a/docs/articles/collider_bias.html +++ b/docs/articles/collider_bias.html @@ -109,8 +109,8 @@

Collider bias

library(genepi.utils) # read in the data -gwas_clumped_incidence <- data.table::fread(clumped_incidence_path)[index==TRUE, ] # take only the index SNPs -gwas_progression <- data.table::fread(progression_path)

+gwas_clumped_incidence <- data.table::fread(clumped_incidence_path)[index==TRUE, ] # take only the index SNPs +gwas_progression <- data.table::fread(progression_path)

Slope-hunter

@@ -349,7 +349,7 @@

Applying the correction factor# calculate difference in raw and adjusted betas; and effect on genome wide significance corrected_gwas[, beta_diff := BETA_progression - adjusted_beta] sig_change_levels <- c("Dropped hit", "New hit", "No change") -corrected_gwas[, sig_change:= data.table::fcase(P_progression < 5e-8 & adjusted_p >= 5e-8, factor("Dropped hit", levels=sig_change_levels), +corrected_gwas[, sig_change:= data.table::fcase(P_progression < 5e-8 & adjusted_p >= 5e-8, factor("Dropped hit", levels=sig_change_levels), P_progression >=5e-8 & adjusted_p < 5e-8, factor("New hit", levels=sig_change_levels), default = factor("No change", levels=sig_change_levels))] diff --git a/docs/articles/drug_target_proxy.html b/docs/articles/drug_target_proxy.html index 61952b2..ea00630 100644 --- a/docs/articles/drug_target_proxy.html +++ b/docs/articles/drug_target_proxy.html @@ -228,11 +228,11 @@

Statins - an eQTL examplehmgcar_eqtl <- QTL("/Users/xx20081/Documents/local_data/gtex_v8/gtex_v8_chr5.tsv.gz", p_val=0.05, join_key="RSID_b37") -data.table::setnames(hmgcar_eqtl$data, c("SNP_b38", "BP_b37"), c("SNP", "BP")) +data.table::setnames(hmgcar_eqtl$data, c("SNP_b38", "BP_b37"), c("SNP", "BP")) hmgcar_eqtl$data[, CHR := as.character(CHR)] # set the concordance for beta effects in LDL and HMGCoAR -concordance <- data.table::data.table("data_name_1" = c(""), data_name_2=c("hmgcoar_gtex"), concordant=c(TRUE)) +concordance <- data.table::data.table("data_name_1" = c(""), data_name_2=c("hmgcoar_gtex"), concordant=c(TRUE)) # extract the variants for the instrument statin_instrument_dat <- drug_target_proxy(gwas_gene = gwas_ldl_hmgcoar, @@ -320,11 +320,11 @@

GLP1R agonists # create an eQTL object for the GTexV8 data glp1r_eqtl <- QTL("/Users/xx20081/Documents/local_data/gtex_v8/gtex_v8_chr6.tsv.gz", p_val=1, join_key="RSID_b37") -data.table::setnames(glp1r_eqtl$data, c("SNP_b38", "BP_b37"), c("SNP", "BP")) +data.table::setnames(glp1r_eqtl$data, c("SNP_b38", "BP_b37"), c("SNP", "BP")) glp1r_eqtl$data[, CHR := as.character(CHR)] # set the concordance for beta effects in HbA1c and GLP1R - FALSE as high HbA1c should relate to less GLP1R expression -concordance <- data.table::data.table("data_name_1" = c("hba1c_qtl"), data_name_2=c("glp1r_eqtl"), concordant=c(FALSE)) +concordance <- data.table::data.table("data_name_1" = c("hba1c_qtl"), data_name_2=c("glp1r_eqtl"), concordant=c(FALSE)) # extract the variants for the instrument incretin_instrument_dat <- drug_target_proxy(gwas_gene = gwas_t2dm_glp1r, diff --git a/docs/articles/harmonise.html b/docs/articles/harmonise.html index a3afb3e..8550363 100644 --- a/docs/articles/harmonise.html +++ b/docs/articles/harmonise.html @@ -111,7 +111,7 @@

Run library(genepi.utils) # the gwas data -gwas1 <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils")) +gwas1 <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils")) gwas2 <- gwas1 # ensure standard column names @@ -236,7 +236,7 @@

Evaluation speedgwas_incidence_path <- "/Users/xx20081/Documents/local_data/hermes_incidence/standardised/hf_incidence_pheno1_eur.tsv.gz" # some GWAS data -gwas1 <- data.table::fread(gwas_incidence_path)[1:100000, ] +gwas1 <- data.table::fread(gwas_incidence_path)[1:100000, ] gwas2 <- gwas1 # Slopehunter read in diff --git a/docs/articles/ld_matrix.html b/docs/articles/ld_matrix.html index 858ff41..33d23e2 100644 --- a/docs/articles/ld_matrix.html +++ b/docs/articles/ld_matrix.html @@ -109,7 +109,7 @@

Setup library(genepi.utils) # the gwas data -gwas <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils")) +gwas <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils")) # ensure annotated with rsID gwas <- standardise_gwas(gwas, input_format="ns_map", build="GRCh37", populate_rsid="b37_dbsnp156") @@ -159,7 +159,7 @@

LD matrix#> rs10786405 -0.596843 1.000000 -0.342968 #> rs11816998 -0.272363 -0.342968 1.000000 #> attr(,"log") -#> [1] "PLINK v2.00a6 M1 (23 Nov 2023)\nOptions in effect:\n --extract /var/folders/62/n2yrhw752hn73nlbzp0r0_rr0000gq/T//RtmpMOBawz/file124e549dc95bd\n --out /Users/xx20081/Downloads/ld_mat\n --pfile /Users/xx20081/Documents/local_data/genome_reference/ref_1000GP_phase3_plink/all_phase3_nodup\n --r-phased square\n\nHostname: K3P0KGKVKT\nWorking directory: /Users/xx20081/git/genepi.utils/vignettes\nStart time: Tue Dec 5 20:08:58 2023\n\nRandom number seed: 1701806938\n98304 MiB RAM detected; reserving 49152 MiB for main workspace.\nUsing up to 12 threads (change this with --threads).\n2504 samples (1270 females, 1234 males; 2497 founders) loaded from\n/Users/xx20081/Documents/local_data/genome_reference/ref_1000GP_phase3_plink/all_phase3_nodup.psam.\n48493710 variants loaded from\n/Users/xx20081/Documents/local_data/genome_reference/ref_1000GP_phase3_plink/all_phase3_nodup.pvar.\n2 categorical phenotypes loaded.\n--extract: 9 variants remaining.\nCalculating allele frequencies... done.\n9 variants remaining after main filters.\n--r-phased: Variant IDs written to\n/Users/xx20081/Downloads/ld_mat.phased.vcor1.vars .\n--r-phased: Matrix written to /Users/xx20081/Downloads/ld_mat.phased.vcor1 .\n\nEnd time: Tue Dec 5 20:09:03 2023" +#> [1] "PLINK v2.00a6 M1 (23 Nov 2023)\nOptions in effect:\n --extract /var/folders/62/n2yrhw752hn73nlbzp0r0_rr0000gq/T//RtmpMT7NKD/filef95ea5e1593\n --out /Users/xx20081/Downloads/ld_mat\n --pfile /Users/xx20081/Documents/local_data/genome_reference/ref_1000GP_phase3_plink/all_phase3_nodup\n --r-phased square\n\nHostname: K3P0KGKVKT\nWorking directory: /Users/xx20081/git/genepi.utils/vignettes\nStart time: Wed Dec 13 20:45:51 2023\n\nRandom number seed: 1702500351\n98304 MiB RAM detected; reserving 49152 MiB for main workspace.\nUsing up to 12 threads (change this with --threads).\n2504 samples (1270 females, 1234 males; 2497 founders) loaded from\n/Users/xx20081/Documents/local_data/genome_reference/ref_1000GP_phase3_plink/all_phase3_nodup.psam.\n48493710 variants loaded from\n/Users/xx20081/Documents/local_data/genome_reference/ref_1000GP_phase3_plink/all_phase3_nodup.pvar.\n2 categorical phenotypes loaded.\n--extract: 9 variants remaining.\nCalculating allele frequencies... done.\n9 variants remaining after main filters.\n--r-phased: Variant IDs written to\n/Users/xx20081/Downloads/ld_mat.phased.vcor1.vars .\n--r-phased: Matrix written to /Users/xx20081/Downloads/ld_mat.phased.vcor1 .\n\nEnd time: Wed Dec 13 20:45:56 2023" #> attr(,"allele_info") #> RSID CHR BP REF ALT #> 1 rs7899632 10 100000625 A G @@ -177,7 +177,7 @@

Harmonise data against LD matr

 # harmonised data 
-harm <- harmonise(gwas, data.table::copy(gwas), gwas1_trait="exposure", gwas2_trait="outcome", merge=c("RSID"="RSID"))
+harm <- harmonise(gwas, data.table::copy(gwas), gwas1_trait="exposure", gwas2_trait="outcome", merge=c("RSID"="RSID"))
 
 # mess up the alleles vs reference
 harm <- harm[RSID_exposure=="rs7899632", c("EA_exposure","EA_outcome","OA_exposure","OA_outcome") := list("A","A","G","G")]
diff --git a/docs/articles/standardise_gwas.html b/docs/articles/standardise_gwas.html
index 298df78..2ee9b51 100644
--- a/docs/articles/standardise_gwas.html
+++ b/docs/articles/standardise_gwas.html
@@ -105,7 +105,7 @@ 

Standardise GWAS

 library(genepi.utils)
 
-gwas <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils"))
+gwas <- data.table::fread(system.file("extdata", "example2_gwas_sumstats.tsv", package="genepi.utils"))
 
 gwas
 #>           MARKER   CHR       POS   BETA    SE     P      EAF     A1     A2
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 421953a..7b1c1ca 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -10,5 +10,5 @@ articles:
   ld_matrix: ld_matrix.html
   plotting: plotting.html
   standardise_gwas: standardise_gwas.html
-last_built: 2023-12-05T20:07Z
+last_built: 2023-12-13T20:44Z
 
diff --git a/docs/reference/ColliderBiasResult.html b/docs/reference/ColliderBiasResult.html
index 2443f8a..b605e8a 100644
--- a/docs/reference/ColliderBiasResult.html
+++ b/docs/reference/ColliderBiasResult.html
@@ -85,7 +85,7 @@ 

Collider bias result object

intercept = NA_real_, bse = NA_real_, entropy = NA_real_, - fit = data.table::data.table() + fit = data.table::data.table() )
@@ -115,6 +115,10 @@

Arguments

a numeric, the estimated correction factor

+
intercept
+

a numeric, the y slope intercept

+ +
bse

a numeric, the standard error of the correction factor

diff --git a/docs/reference/analyse_collider_bias.html b/docs/reference/analyse_collider_bias.html index 0801a07..f769e8e 100644 --- a/docs/reference/analyse_collider_bias.html +++ b/docs/reference/analyse_collider_bias.html @@ -121,6 +121,10 @@

Arguments

a character vector of collider bias method function names to run

+
tsmr_method
+

a string; a valid TwoSampleMR::mr() method_list parameter

+ +
ip

a numeric, range 0-1, the initial p-value by which to filter the incidence GWAS

diff --git a/docs/reference/drug_target_proxy.html b/docs/reference/drug_target_proxy.html index 99927fe..bcc87b3 100644 --- a/docs/reference/drug_target_proxy.html +++ b/docs/reference/drug_target_proxy.html @@ -90,7 +90,7 @@

Create a drug target proxy instrument

kb = 250, join_key = "RSID", QTL_list = list(), - concordance = data.table::data.table(data_name_1 = character(), data_name_2 = + concordance = data.table::data.table(data_name_1 = character(), data_name_2 = character(), concordant = logical()) )

diff --git a/docs/reference/index.html b/docs/reference/index.html index abf75fe..c441e41 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -204,6 +204,10 @@

Plotting miami()

Miami plot

+ +

qq_plot()

+ +

QQ plot

eaf_plot()

@@ -212,6 +216,10 @@

Plotting generate_random_gwas_data()

Generate random GWAS data

+ +

plot_coloc_probabilities()

+ +

Coloc probability plot

Setup

Functions for package configuration

diff --git a/docs/reference/plot_coloc_probabilities.html b/docs/reference/plot_coloc_probabilities.html new file mode 100644 index 0000000..fbcce8e --- /dev/null +++ b/docs/reference/plot_coloc_probabilities.html @@ -0,0 +1,135 @@ + +Coloc probability plot — plot_coloc_probabilities • genepi.utils + + +
+
+ + + +
+
+ + +
+

A plotting wrapper for the coloc package. Produces a ggplot for either +the prior or posterior probability sensitivity analyses. See the +coloc +package vignettes for details.

+
+ +
+
plot_coloc_probabilities(coloc, rule = "H4 > 0.5", type = "prior")
+
+ +
+

Arguments

+
coloc
+

coloc object, output from coloc::coloc.abf()

+ + +
rule
+

a string, a valid rule indicating success e.g. "H4 > 0.5"

+ + +
type
+

a string, either prior or posterior

+ +
+
+

Value

+ + +

a ggplot

+
+
+

References

+

coloc

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/qq_plot.html b/docs/reference/qq_plot.html new file mode 100644 index 0000000..7929521 --- /dev/null +++ b/docs/reference/qq_plot.html @@ -0,0 +1,164 @@ + +QQ plot — qq_plot • genepi.utils + + +
+
+ + + +
+
+ + +
+

QQ plot

+
+ +
+
qq_plot(
+  gwas,
+  pval_col = "P",
+  colours = list(raw = "#2166AC", corrected = "#B2182B"),
+  title = NULL,
+  subtitle = NULL,
+  plot_corrected = FALSE,
+  facet_grid_row_col = NULL,
+  facet_grid_col_col = NULL,
+  facet_nrow = NULL,
+  facet_ncol = NULL
+)
+
+ +
+

Arguments

+
gwas
+

a data.frame like object or valid file path

+ + +
pval_col
+

the P value column

+ + +
colours
+

a 2 element list of colour codes (1-the uncorrected points, 2-the GC corrected points)

+ + +
title
+

a string, the title for the plot

+ + +
subtitle
+

a string, the subtitle for the plot

+ + +
plot_corrected
+

a logical, whether to apply and plot the lambda correction

+ + +
facet_grid_row_col
+

a string, the column name in gwas by which to facet the plot (rows)

+ + +
facet_grid_col_col
+

a string, the column name in gwas by which to facet the plot (cols)

+ + +
facet_nrow
+

an integer, passed to facet_wrap, the number of rows to facet by (if only facet_grid_row_col is provided)

+ + +
facet_ncol
+

an integer, passed to facet_wrap, the number of cols to facet by (if only facet_grid_col_col is provided)

+ +
+
+

Value

+ + +

a ggplot

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0423afd..b8adada 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -138,6 +138,9 @@ /reference/mr_collider_bias.html + + /reference/plot_coloc_probabilities.html + /reference/plot_correction_stability.html @@ -156,6 +159,9 @@ /reference/plot_slopehunter_iters.html + + /reference/qq_plot.html + /reference/set_1000G_reference.html diff --git a/man/ColliderBiasResult.Rd b/man/ColliderBiasResult.Rd index 43918d9..5325fb2 100644 --- a/man/ColliderBiasResult.Rd +++ b/man/ColliderBiasResult.Rd @@ -30,6 +30,8 @@ ColliderBiasResult( \item{b}{a numeric, the estimated correction factor} +\item{intercept}{a numeric, the y slope intercept} + \item{bse}{a numeric, the standard error of the correction factor} \item{entropy}{a numeric, the entropy - output from the Slope-hunter method} diff --git a/man/analyse_collider_bias.Rd b/man/analyse_collider_bias.Rd index 01e83db..fcbf819 100644 --- a/man/analyse_collider_bias.Rd +++ b/man/analyse_collider_bias.Rd @@ -27,6 +27,8 @@ analyse_collider_bias( \item{methods}{a character vector of collider bias method function names to run} +\item{tsmr_method}{a string; a valid \code{TwoSampleMR::mr()} \code{method_list} parameter} + \item{ip}{a numeric, range 0-1, the initial p-value by which to filter the incidence GWAS} \item{pi0}{a numeric, range 0-1, the initial weight / percentage of SNPs that affect incidence only} diff --git a/man/plot_coloc_probabilities.Rd b/man/plot_coloc_probabilities.Rd new file mode 100644 index 0000000..3e17e61 --- /dev/null +++ b/man/plot_coloc_probabilities.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocalisation.R +\name{plot_coloc_probabilities} +\alias{plot_coloc_probabilities} +\title{Coloc probability plot} +\usage{ +plot_coloc_probabilities(coloc, rule = "H4 > 0.5", type = "prior") +} +\arguments{ +\item{coloc}{coloc object, output from \code{coloc::coloc.abf()}} + +\item{rule}{a string, a valid rule indicating success e.g. "H4 > 0.5"} + +\item{type}{a string, either \code{prior} or \code{posterior}} +} +\value{ +a ggplot +} +\description{ +A plotting wrapper for the \code{coloc} package. Produces a ggplot for either +the prior or posterior probability sensitivity analyses. See the +\href{https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html}{coloc} +package vignettes for details. +} +\references{ +\href{https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html}{coloc} +} diff --git a/man/qq_plot.Rd b/man/qq_plot.Rd new file mode 100644 index 0000000..e333293 --- /dev/null +++ b/man/qq_plot.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qq_plot.R +\name{qq_plot} +\alias{qq_plot} +\title{QQ plot} +\usage{ +qq_plot( + gwas, + pval_col = "P", + colours = list(raw = "#2166AC", corrected = "#B2182B"), + title = NULL, + subtitle = NULL, + plot_corrected = FALSE, + facet_grid_row_col = NULL, + facet_grid_col_col = NULL, + facet_nrow = NULL, + facet_ncol = NULL +) +} +\arguments{ +\item{gwas}{a data.frame like object or valid file path} + +\item{pval_col}{the P value column} + +\item{colours}{a 2 element list of colour codes (1-the uncorrected points, 2-the GC corrected points)} + +\item{title}{a string, the title for the plot} + +\item{subtitle}{a string, the subtitle for the plot} + +\item{plot_corrected}{a logical, whether to apply and plot the lambda correction} + +\item{facet_grid_row_col}{a string, the column name in \code{gwas} by which to facet the plot (rows)} + +\item{facet_grid_col_col}{a string, the column name in \code{gwas} by which to facet the plot (cols)} + +\item{facet_nrow}{an integer, passed to facet_wrap, the number of rows to facet by (if only facet_grid_row_col is provided)} + +\item{facet_ncol}{an integer, passed to facet_wrap, the number of cols to facet by (if only facet_grid_col_col is provided)} +} +\value{ +a ggplot +} +\description{ +QQ plot +}