QQ plot and coloc plotting

nicksunderland · Dec 13, 2023 · 91ec5aa · 91ec5aa
1 parent d349883
commit 91ec5aa
Show file tree

Hide file tree

Showing 27 changed files with 746 additions and 108 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -29,10 +29,12 @@ export(lift)
 export(manhattan)
 export(miami)
 export(mr_collider_bias)
+export(plot_coloc_probabilities)
 export(plot_correction_stability)
 export(plot_drug_proxy_instrument)
 export(plot_slope)
 export(plot_slopehunter_iters)
+export(qq_plot)
 export(set_1000G_reference)
 export(set_dbsnp_directory)
 export(set_plink2)
@@ -76,9 +78,12 @@ importFrom(progressr,progressor)
 importFrom(progressr,with_progress)
 importFrom(rtracklayer,import.chain)
 importFrom(rtracklayer,liftOver)
+importFrom(stats,as.formula)
 importFrom(stats,cov)
+importFrom(stats,median)
 importFrom(stats,pchisq)
 importFrom(stats,pnorm)
+importFrom(stats,qchisq)
 importFrom(stats,rnorm)
 importFrom(stats,runif)
 importFrom(stats,sd)

diff --git a/R/class_column_map.R b/R/class_column_map.R
@@ -12,7 +12,7 @@ ColumnMapping <- function() {
  ieu_ukb = list(SNP="SNP",BETA="BETA",SE="SE",EA="ALLELE1", OA="ALLELE0", EAF="A1FREQ", P="P_BOLT_LMM_INF"),
  ieugwasr = list(RSID="rsid",CHR="chr",BP="position","EA"="ea",OA="nea",EAF="eaf",BETA="beta",SE="se", P="p", N="n"),
  ns_map = list(SNP="MARKER",CHR="CHR",BP="POS",EA="A1", OA="A2", BETA="BETA", SE="SE", EAF="EAF", P="P"),
- gwama = list(SNP="rs_number",EA="reference_allele", OA="other_allele", BETA="beta", SE="se", EAF="eaf", P="p-value", BETA_95U="beta_95U",BETA_95L="beta_95L", Z="z", LOG10_P="_-log10_p-value", Q_STAT="q_statistic",I2="i2", N_STUDIES="n_studies", N="n_samples", EFFECTS="effects"),
+ gwama = list(RSID="rs_number",CHR="chromosome",BP="position",EA="reference_allele", OA="other_allele", BETA="beta", SE="se", EAF="eaf", P="p-value", BETA_95U="beta_95U",BETA_95L="beta_95L", Z="z", LOG10_P="_-log10_p-value", Q_STAT="q_statistic",I2="i2", N_STUDIES="n_studies", N="n_samples", EFFECTS="effects"),
  giant = list(RSID="SNP",SNP="missing",CHR="CHR", BP="POS","EA"="Tested_Allele", OA="Other_Allele", EAF="Freq_Tested_Allele", BETA="BETA", SE="SE", P="P", N="N", INFO="INFO")
  )
 

diff --git a/R/clump.R b/R/clump.R
@@ -69,33 +69,45 @@ clump <- function(gwas,
  # run clumping
  system(cmd)
 
- # the clumped plink output
- clumped <- data.table::fread(paste0(plink_output,".clumps"), nThread=parallel::detectCores())
+ # check for clump output
+ clump_path <- paste0(plink_output,".clumps")
 
- # add index
- clumped[, clump := seq_len(.N)]
+ if(file.exists(clump_path)) {
 
- # pivot longer
- clumped_long <- clumped[, list(clump_member = unlist(data.table::tstrsplit(SP2, ","))), by=c("ID","clump")]
+  # the clumped plink output
+   clumped <- data.table::fread(clump_path, nThread=parallel::detectCores())
 
- # clean e.g. rs1763611(G) --> rs1763611
- clumped_long[, clump_member := sub("(rs[0-9]+).*", "\\1", clump_member)]
+  # add index
+  clumped[, clump := seq_len(.N)]
 
- # set the key
- data.table::setkey(clumped_long, ID)
- data.table::setkey(gwas, RSID)
+ # pivot longer
+ clumped_long <- clumped[, list(clump_member = unlist(data.table::tstrsplit(SP2, ","))), by=c("ID","clump")]
 
- # flag the index SNPs with TRUE and the clump number
- gwas[clumped_long, c("index","clump") := list(TRUE, clump)]
+  # clean e.g. rs1763611(G) --> rs1763611
+  clumped_long[, clump_member := sub("(rs[0-9]+).*", "\\1", clump_member)]
 
- # set key to the clump member rsID to join again
- data.table::setkey(clumped_long, clump_member)
+ # set the key
+ data.table::setkey(clumped_long, ID)
+ data.table::setkey(gwas, RSID)
 
- # flag the clump members as not the index SNP and with the clump number
- gwas[clumped_long, c("index", "clump") := list(FALSE, i.clump)]
+  # flag the index SNPs with TRUE and the clump number
+  gwas[clumped_long, c("index","clump") := list(TRUE, clump)]
 
- # code clump as a factor
- gwas[, clump := factor(clump, levels=sort(unique(gwas$clump)))]
+ # set key to the clump member rsID to join again
+ data.table::setkey(clumped_long, clump_member)
+
+ # flag the clump members as not the index SNP and with the clump number
+ gwas[clumped_long, c("index", "clump") := list(FALSE, i.clump)]
+
+ # code clump as a factor
+ gwas[, clump := factor(clump, levels=sort(unique(gwas$clump)))]
+
+ } else {
+
+ warning("No clumps were found witht he provided parameters, or plink failed")
+ gwas[, c("index", "clump") := list(FALSE, NA_integer_)]
+
+ }
 
  # the (potential) plink clumping log files that might be produced
  # TODO: check if there are others

diff --git a/R/collider_bias.R b/R/collider_bias.R
@@ -9,7 +9,9 @@
 #' @param bse a numeric, the standard error of the correction factor
 #' @param pi a numeric, the final Pi value, proportion of SNPs in the Gi SNP cluster
 #' @param fit a data.table, the raw points data for plotting table columns `c("SNP_incidence","BETA_incidence","BETA_progression","CLUSTER")`
+#' @param intercept a numeric, the y slope intercept
 #' @param entropy a numeric, the entropy - output from the Slope-hunter method
+#'
 #' @return an S3 object of class ColliderBiasResult
 #' @export
 #'
@@ -586,6 +588,7 @@ mr_collider_bias <- function(gwas_i,
 #' }
 #' ...will lead to 27 separate analyses. \cr
 #' @inheritParams slopehunter
+#' @inheritParams mr_collider_bias
 #' @param methods a character vector of collider bias method function names to run
 #' @return a list of S3 ColliderBiasResult objects
 #' @export
@@ -639,7 +642,7 @@ analyse_collider_bias <- function(gwas_i,
 plot_slope <- function(results) {
 
  # silence RCMD checks
- BETA_incidence = BETA_progression = CLUSTER = hjustvar = vjustvar = xpos = ypos = label = slope = intercept = ip = NULL
+ BETA_incidence = BETA_progression = CLUSTER = hjustvar = vjustvar = xpos = ypos = label = slope = intercept = ip = hunted_snps_label = NULL
 
  # make list of one if a single ColliderBiasResult object
  if(inherits(results, "ColliderBiasResult")) {

diff --git a/R/colocalisation.R b/R/colocalisation.R
@@ -0,0 +1,140 @@
+#' @title Coloc probability plot
+#' @description
+#' A plotting wrapper for the `coloc` package. Produces a ggplot for either
+#' the prior or posterior probability sensitivity analyses. See the
+#' [coloc](https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html)
+#' package vignettes for details.
+#' @param coloc coloc object, output from `coloc::coloc.abf()`
+#' @param rule a string, a valid rule indicating success e.g. "H4 > 0.5"
+#' @param type a string, either `prior` or `posterior`
+#' @return a ggplot
+#' @export
+#' @references [coloc](https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html)
+plot_coloc_probabilities <- function(coloc, rule="H4 > 0.5", type="prior") {
+
+ # RCMD check warnings
+ h <- x <- NULL
+
+ # checks
+ type <- match.arg(type, choices=c('prior','posterior'))
+ stopifnot("list" %in% class(coloc))
+ stopifnot("priors" %in% names(coloc))
+ stopifnot("summary" %in% names(coloc))
+ rule.init <- rule
+ rule <- gsub("(H.)","PP.\\1.abf",rule,perl=TRUE)
+
+ # extract the results
+ results <- coloc$results
+ pp <- coloc$summary
+ p12 <- coloc$priors["p12"]
+ p1 <- coloc$priors["p1"]
+ p2 <- coloc$priors["p2"]
+
+ # do the tests
+ check <- function(pp) { with(as.list(pp),eval(parse(text=rule))) }
+ testp12 <- 10^seq(log10(p1*p2),log10(min(p1,p1)),length.out=100)
+ testH <- prior.snp2hyp(pp["nsnps"],p12=testp12,p1=p1,p2=p2)
+ testpp <- data.table::as.data.table(prior.adjust(summ=pp,newp12=testp12,p1=p1,p2=p2,p12=p12))
+ names(testpp) <- gsub("(H.)","PP.\\1.abf",colnames(testpp),perl=TRUE)
+ pass <- check(testpp)
+ w <- which(pass)
+
+ # base plot to add to
+ p <- ggplot2::ggplot()
+
+ # which plot to create
+ if(type=="prior") {
+
+ # create the data.frame for prior probabilities
+ prior_dat <- data.table::data.table(testH)
+ prior_dat[, x := testp12]
+ prior_dat <- data.table::melt(prior_dat,
+ id.vars = c("x"),
+ measure.vars = paste0("H",0:4),
+ variable.name = "h",
+ value.name = "p")
+ prior_dat[, h := factor(h, levels=paste0("H",0:4))]
+
+ p_max <- max(prior_dat$p[prior_dat$h != "H0"], na.rm=T)
+
+ if(any(pass)) {
+ p <- p + ggplot2::geom_rect(ggplot2::aes(xmin=testp12[min(w)], xmax=testp12[max(w)], ymin=0, ymax=p_max), fill="green", alpha=0.1, color="green")
+ }
+
+ p <- p +
+ ggplot2::geom_point(data = prior_dat, mapping = ggplot2::aes(x=x, y=p, fill=h), color="#333333", shape=21, size=3, stroke=0.25) +
+ ggplot2::lims(y=c(0,p_max)) +
+ ggplot2::labs(title = "Prior probabilities", subtitle = paste("shaded region:",rule.init))
+
+ } else if(type=="posterior") {
+
+ # create the data.frame for posterior probabilities
+ posterior_dat <- data.table::data.table(as.matrix(testpp))
+ posterior_dat[, x := testp12]
+ posterior_dat <- data.table::melt(posterior_dat,
+ id.vars = c("x"),
+ measure.vars = paste0("PP.H",0:4,".abf"),
+ variable.name = "h",
+ value.name = "p")
+ posterior_dat[, h := factor(h, levels=paste0("PP.H",0:4,".abf"))]
+
+ p_max <- max(posterior_dat$p, na.rm=T)
+
+ if(any(pass)) {
+ p <- p + ggplot2::geom_rect(ggplot2::aes(xmin=testp12[min(w)], xmax=testp12[max(w)], ymin=0, ymax=p_max), fill="green", alpha=0.1, color="green")
+ }
+
+ p <- p +
+ ggplot2::geom_point(data = posterior_dat, mapping = ggplot2::aes(x=x, y=p, fill=h), color="#333333", shape=21, size=3, stroke=0.25) +
+ ggplot2::lims(y=c(0,p_max)) +
+ ggplot2::labs(title = "Posterior probabilities", subtitle = paste("shaded region:",rule.init))
+
+ } else {
+
+ stop("type parameter error")
+
+ }
+
+ # add the common elements
+ p <- p +
+ ggplot2::scale_fill_manual(values = c("#ffffffff",viridis::viridis(5,alpha=1)[-1])) +
+ ggplot2::geom_vline(xintercept = p12, linetype="dashed", color="gray") +
+ ggplot2::scale_x_continuous(trans='log10') +
+ ggplot2::labs(x="p12", y="Prob") +
+ ggplot2::annotate("text", x=p12, y=0.5, label="results", angle=90, color="gray40") +
+ ggplot2::theme(legend.position = "top",
+ legend.title = ggplot2::element_blank())
+
+ # return
+ return(p)
+}
+
+
+# copied from coloc package to make the above work
+prior.adjust <- function(summ,newp12,p1=1e-4,p2=1e-4,p12=1e-6) {
+ if(is.list(summ) && "summary" %in% names(summ))
+ summ <- summ$summary
+ if(!identical(names(summ), c("nsnps", "PP.H0.abf", "PP.H1.abf", "PP.H2.abf", "PP.H3.abf", "PP.H4.abf")))
+ stop("not a coloc summary vector")
+ ## back calculate likelihoods
+ f <- function(p12)
+ prior.snp2hyp(summ["nsnps"],p12=p12,p1=p1,p2=p2)
+ pr1 <- f(newp12)
+ pr0 <- matrix(f(p12),nrow=nrow(pr1),ncol=ncol(pr1),byrow=TRUE)
+ newpp <- matrix(summ[-1],nrow=nrow(pr1),ncol=ncol(pr1),byrow=TRUE) * pr1/pr0 # prop to, not equal to
+ newpp/rowSums(newpp)
+}
+
+
+# copied from coloc package to make the above work
+prior.snp2hyp <- function(nsnp,p12=1e-6,p1=1e-4,p2=1e-4) {
+ if(any(p12<p1*p2) || any(p12 > p1) || any(p12 > p2))
+ return(NULL)
+ tmp <- cbind(nsnp * p1,
+ nsnp * p2,
+ nsnp * (nsnp-1) * p1 * p2,
+ nsnp * p12)
+ tmp <- cbind(1-rowSums(tmp),tmp)
+ colnames(tmp) <- paste0("H",0:4)
+ tmp
+}
diff --git a/R/manhattan.R b/R/manhattan.R
@@ -1,5 +1,5 @@
 # silence R CMD checks for data.table columns
-BETA = BP = CHR = P = SE = SNP = chr_len = tot = x = i.tot = highlight = NULL
+BETA = BP = CHR = P = SE = SNP = chr_len = tot = x = i.tot = highlight = secondary_highlight = NULL
 
 #' @title Manhattan plot
 #' @description
@@ -65,7 +65,7 @@ manhattan <- function(gwas,
  # annotate_snps = c("15:135277414[b37]G,T")
 
  # create factor from CHR
- gwas[, "CHR" := as.factor(CHR)]
+ gwas[, "CHR" := factor(CHR, levels=c(as.character(1:25), setdiff(as.character(unique(CHR)), as.character(1:25))))]
 
  # down-sample the insignificant dots for plotting
  if(downsample > 0) {
@@ -118,26 +118,19 @@ manhattan <- function(gwas,
  )
 
  # add highlighted variants within requested kb BP window
- if(!is.null(highlight_snps)) {
+ if(!is.null(highlight_snps) & length(highlight_snps)>0) {
  gwas[SNP %in% highlight_snps, highlight := TRUE]
  if(highlight_win > 0) {
  highlight_data <- gwas[SNP %in% highlight_snps, ]
  for(i in 1:nrow(highlight_data)) {
  bp <- highlight_data[[i, "BP"]]
  chr<- highlight_data[[i, "CHR"]]
- gwas[BP > (bp - highlight_win*1000) & BP < (bp + highlight_win*1000) & CHR == chr, highlight := TRUE]
+ gwas[BP > (bp - highlight_win*1000) & BP < (bp + highlight_win*1000) & CHR == chr, secondary_highlight := TRUE]
  }
  }
  plot <- plot +
- ggplot2::geom_point(data = gwas[highlight==TRUE, ], color=highlight_colour, shape=highlight_shape, alpha=highlight_alpha, size=1.5)
- }
-
- # add SNP label annotations
- if(!is.null(annotate_snps)) {
- gwas[SNP %in% annotate_snps, annotate := TRUE]
- label_x_nudge <- max(gwas[["x"]], na.rm=TRUE) / 22
- plot <- plot +
- ggrepel::geom_label_repel(data=gwas[annotate==TRUE, ], ggplot2::aes(label=SNP), colour="black", label.size = 0.1, nudge_x=label_x_nudge )
+ ggplot2::geom_point(data = gwas[secondary_highlight==TRUE, ], color=highlight_colour, fill=highlight_colour, alpha=highlight_alpha) +
+ ggplot2::geom_point(data = gwas[highlight==TRUE, ], color=highlight_colour, fill=highlight_colour, shape=highlight_shape, alpha=highlight_alpha, size=3)
  }
 
  # add horizontal significance lines
@@ -148,6 +141,14 @@ manhattan <- function(gwas,
  plot <- plot + ggplot2::geom_hline(yintercept = -log10(sig_line_2), linetype = "dashed", color = "darkgrey")
  }
 
+ # add SNP label annotations
+ if(!is.null(annotate_snps) & length(annotate_snps)>0) {
+ gwas[SNP %in% annotate_snps, annotate := TRUE]
+ label_x_nudge <- max(gwas[["x"]], na.rm=TRUE) / 22
+ plot <- plot +
+ ggrepel::geom_label_repel(data=gwas[annotate==TRUE, ], ggplot2::aes(label=SNP), colour="black", label.size = 0.1, nudge_x=label_x_nudge )
+ }
+
  # add titles and labels
  plot <- plot + ggplot2::labs(title=title, subtitle=subtitle, x="Chromosome", y=log10P)
 
@@ -262,12 +263,15 @@ miami <- function(gwases,
  if(!is.null(sig_line_2[[2]])) {
  max_p <- max(c(max_p, -log10(sig_line_2[[2]])),na.rm=T)
  }
- y_limits <- c(ceiling(max_p), 0)
+ y_limits[[2]] <- c(ceiling(max_p), 0)
+ } else {
+ # user usually provides in the normal way c(0, upper)
+ y_limits[[2]] <- rev(y_limits[[2]])
  }
 
  # flip the bottom plot
  plot_lower <- plot_lower +
- ggplot2::scale_y_reverse(limits=y_limits, expand=c(0, 0)) +
+ ggplot2::scale_y_reverse(limits=y_limits[[2]], expand=c(0, 0)) +
  ggplot2::scale_x_continuous(expand=c(0.01, 0.01), position = "top") +
  ggplot2::theme(axis.title.x=ggplot2::element_blank(),
  axis.line.x=ggplot2::element_blank(),