Skip to content

Commit

Permalink
QQ plot and coloc plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
nicksunderland committed Dec 13, 2023
1 parent d349883 commit 91ec5aa
Show file tree
Hide file tree
Showing 27 changed files with 746 additions and 108 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ export(lift)
export(manhattan)
export(miami)
export(mr_collider_bias)
export(plot_coloc_probabilities)
export(plot_correction_stability)
export(plot_drug_proxy_instrument)
export(plot_slope)
export(plot_slopehunter_iters)
export(qq_plot)
export(set_1000G_reference)
export(set_dbsnp_directory)
export(set_plink2)
Expand Down Expand Up @@ -76,9 +78,12 @@ importFrom(progressr,progressor)
importFrom(progressr,with_progress)
importFrom(rtracklayer,import.chain)
importFrom(rtracklayer,liftOver)
importFrom(stats,as.formula)
importFrom(stats,cov)
importFrom(stats,median)
importFrom(stats,pchisq)
importFrom(stats,pnorm)
importFrom(stats,qchisq)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
Expand Down
2 changes: 1 addition & 1 deletion R/class_column_map.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ColumnMapping <- function() {
ieu_ukb = list(SNP="SNP",BETA="BETA",SE="SE",EA="ALLELE1", OA="ALLELE0", EAF="A1FREQ", P="P_BOLT_LMM_INF"),
ieugwasr = list(RSID="rsid",CHR="chr",BP="position","EA"="ea",OA="nea",EAF="eaf",BETA="beta",SE="se", P="p", N="n"),
ns_map = list(SNP="MARKER",CHR="CHR",BP="POS",EA="A1", OA="A2", BETA="BETA", SE="SE", EAF="EAF", P="P"),
gwama = list(SNP="rs_number",EA="reference_allele", OA="other_allele", BETA="beta", SE="se", EAF="eaf", P="p-value", BETA_95U="beta_95U",BETA_95L="beta_95L", Z="z", LOG10_P="_-log10_p-value", Q_STAT="q_statistic",I2="i2", N_STUDIES="n_studies", N="n_samples", EFFECTS="effects"),
gwama = list(RSID="rs_number",CHR="chromosome",BP="position",EA="reference_allele", OA="other_allele", BETA="beta", SE="se", EAF="eaf", P="p-value", BETA_95U="beta_95U",BETA_95L="beta_95L", Z="z", LOG10_P="_-log10_p-value", Q_STAT="q_statistic",I2="i2", N_STUDIES="n_studies", N="n_samples", EFFECTS="effects"),
giant = list(RSID="SNP",SNP="missing",CHR="CHR", BP="POS","EA"="Tested_Allele", OA="Other_Allele", EAF="Freq_Tested_Allele", BETA="BETA", SE="SE", P="P", N="N", INFO="INFO")
)

Expand Down
50 changes: 31 additions & 19 deletions R/clump.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,33 +69,45 @@ clump <- function(gwas,
# run clumping
system(cmd)

# the clumped plink output
clumped <- data.table::fread(paste0(plink_output,".clumps"), nThread=parallel::detectCores())
# check for clump output
clump_path <- paste0(plink_output,".clumps")

# add index
clumped[, clump := seq_len(.N)]
if(file.exists(clump_path)) {

# pivot longer
clumped_long <- clumped[, list(clump_member = unlist(data.table::tstrsplit(SP2, ","))), by=c("ID","clump")]
# the clumped plink output
clumped <- data.table::fread(clump_path, nThread=parallel::detectCores())

# clean e.g. rs1763611(G) --> rs1763611
clumped_long[, clump_member := sub("(rs[0-9]+).*", "\\1", clump_member)]
# add index
clumped[, clump := seq_len(.N)]

# set the key
data.table::setkey(clumped_long, ID)
data.table::setkey(gwas, RSID)
# pivot longer
clumped_long <- clumped[, list(clump_member = unlist(data.table::tstrsplit(SP2, ","))), by=c("ID","clump")]

# flag the index SNPs with TRUE and the clump number
gwas[clumped_long, c("index","clump") := list(TRUE, clump)]
# clean e.g. rs1763611(G) --> rs1763611
clumped_long[, clump_member := sub("(rs[0-9]+).*", "\\1", clump_member)]

# set key to the clump member rsID to join again
data.table::setkey(clumped_long, clump_member)
# set the key
data.table::setkey(clumped_long, ID)
data.table::setkey(gwas, RSID)

# flag the clump members as not the index SNP and with the clump number
gwas[clumped_long, c("index", "clump") := list(FALSE, i.clump)]
# flag the index SNPs with TRUE and the clump number
gwas[clumped_long, c("index","clump") := list(TRUE, clump)]

# code clump as a factor
gwas[, clump := factor(clump, levels=sort(unique(gwas$clump)))]
# set key to the clump member rsID to join again
data.table::setkey(clumped_long, clump_member)

# flag the clump members as not the index SNP and with the clump number
gwas[clumped_long, c("index", "clump") := list(FALSE, i.clump)]

# code clump as a factor
gwas[, clump := factor(clump, levels=sort(unique(gwas$clump)))]

} else {

warning("No clumps were found witht he provided parameters, or plink failed")
gwas[, c("index", "clump") := list(FALSE, NA_integer_)]

}

# the (potential) plink clumping log files that might be produced
# TODO: check if there are others
Expand Down
5 changes: 4 additions & 1 deletion R/collider_bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#' @param bse a numeric, the standard error of the correction factor
#' @param pi a numeric, the final Pi value, proportion of SNPs in the Gi SNP cluster
#' @param fit a data.table, the raw points data for plotting table columns `c("SNP_incidence","BETA_incidence","BETA_progression","CLUSTER")`
#' @param intercept a numeric, the y slope intercept
#' @param entropy a numeric, the entropy - output from the Slope-hunter method
#'
#' @return an S3 object of class ColliderBiasResult
#' @export
#'
Expand Down Expand Up @@ -586,6 +588,7 @@ mr_collider_bias <- function(gwas_i,
#' }
#' ...will lead to 27 separate analyses. \cr
#' @inheritParams slopehunter
#' @inheritParams mr_collider_bias
#' @param methods a character vector of collider bias method function names to run
#' @return a list of S3 ColliderBiasResult objects
#' @export
Expand Down Expand Up @@ -639,7 +642,7 @@ analyse_collider_bias <- function(gwas_i,
plot_slope <- function(results) {

# silence RCMD checks
BETA_incidence = BETA_progression = CLUSTER = hjustvar = vjustvar = xpos = ypos = label = slope = intercept = ip = NULL
BETA_incidence = BETA_progression = CLUSTER = hjustvar = vjustvar = xpos = ypos = label = slope = intercept = ip = hunted_snps_label = NULL

# make list of one if a single ColliderBiasResult object
if(inherits(results, "ColliderBiasResult")) {
Expand Down
140 changes: 140 additions & 0 deletions R/colocalisation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#' @title Coloc probability plot
#' @description
#' A plotting wrapper for the `coloc` package. Produces a ggplot for either
#' the prior or posterior probability sensitivity analyses. See the
#' [coloc](https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html)
#' package vignettes for details.
#' @param coloc coloc object, output from `coloc::coloc.abf()`
#' @param rule a string, a valid rule indicating success e.g. "H4 > 0.5"
#' @param type a string, either `prior` or `posterior`
#' @return a ggplot
#' @export
#' @references [coloc](https://chr1swallace.github.io/coloc/articles/a04_sensitivity.html)
plot_coloc_probabilities <- function(coloc, rule="H4 > 0.5", type="prior") {

# RCMD check warnings
h <- x <- NULL

# checks
type <- match.arg(type, choices=c('prior','posterior'))
stopifnot("list" %in% class(coloc))
stopifnot("priors" %in% names(coloc))
stopifnot("summary" %in% names(coloc))
rule.init <- rule
rule <- gsub("(H.)","PP.\\1.abf",rule,perl=TRUE)

# extract the results
results <- coloc$results
pp <- coloc$summary
p12 <- coloc$priors["p12"]
p1 <- coloc$priors["p1"]
p2 <- coloc$priors["p2"]

# do the tests
check <- function(pp) { with(as.list(pp),eval(parse(text=rule))) }
testp12 <- 10^seq(log10(p1*p2),log10(min(p1,p1)),length.out=100)
testH <- prior.snp2hyp(pp["nsnps"],p12=testp12,p1=p1,p2=p2)
testpp <- data.table::as.data.table(prior.adjust(summ=pp,newp12=testp12,p1=p1,p2=p2,p12=p12))
names(testpp) <- gsub("(H.)","PP.\\1.abf",colnames(testpp),perl=TRUE)
pass <- check(testpp)
w <- which(pass)

# base plot to add to
p <- ggplot2::ggplot()

# which plot to create
if(type=="prior") {

# create the data.frame for prior probabilities
prior_dat <- data.table::data.table(testH)
prior_dat[, x := testp12]
prior_dat <- data.table::melt(prior_dat,
id.vars = c("x"),
measure.vars = paste0("H",0:4),
variable.name = "h",
value.name = "p")
prior_dat[, h := factor(h, levels=paste0("H",0:4))]

p_max <- max(prior_dat$p[prior_dat$h != "H0"], na.rm=T)

if(any(pass)) {
p <- p + ggplot2::geom_rect(ggplot2::aes(xmin=testp12[min(w)], xmax=testp12[max(w)], ymin=0, ymax=p_max), fill="green", alpha=0.1, color="green")
}

p <- p +
ggplot2::geom_point(data = prior_dat, mapping = ggplot2::aes(x=x, y=p, fill=h), color="#333333", shape=21, size=3, stroke=0.25) +
ggplot2::lims(y=c(0,p_max)) +
ggplot2::labs(title = "Prior probabilities", subtitle = paste("shaded region:",rule.init))

} else if(type=="posterior") {

# create the data.frame for posterior probabilities
posterior_dat <- data.table::data.table(as.matrix(testpp))
posterior_dat[, x := testp12]
posterior_dat <- data.table::melt(posterior_dat,
id.vars = c("x"),
measure.vars = paste0("PP.H",0:4,".abf"),
variable.name = "h",
value.name = "p")
posterior_dat[, h := factor(h, levels=paste0("PP.H",0:4,".abf"))]

p_max <- max(posterior_dat$p, na.rm=T)

if(any(pass)) {
p <- p + ggplot2::geom_rect(ggplot2::aes(xmin=testp12[min(w)], xmax=testp12[max(w)], ymin=0, ymax=p_max), fill="green", alpha=0.1, color="green")
}

p <- p +
ggplot2::geom_point(data = posterior_dat, mapping = ggplot2::aes(x=x, y=p, fill=h), color="#333333", shape=21, size=3, stroke=0.25) +
ggplot2::lims(y=c(0,p_max)) +
ggplot2::labs(title = "Posterior probabilities", subtitle = paste("shaded region:",rule.init))

} else {

stop("type parameter error")

}

# add the common elements
p <- p +
ggplot2::scale_fill_manual(values = c("#ffffffff",viridis::viridis(5,alpha=1)[-1])) +
ggplot2::geom_vline(xintercept = p12, linetype="dashed", color="gray") +
ggplot2::scale_x_continuous(trans='log10') +
ggplot2::labs(x="p12", y="Prob") +
ggplot2::annotate("text", x=p12, y=0.5, label="results", angle=90, color="gray40") +
ggplot2::theme(legend.position = "top",
legend.title = ggplot2::element_blank())

# return
return(p)
}


# copied from coloc package to make the above work
prior.adjust <- function(summ,newp12,p1=1e-4,p2=1e-4,p12=1e-6) {
if(is.list(summ) && "summary" %in% names(summ))
summ <- summ$summary
if(!identical(names(summ), c("nsnps", "PP.H0.abf", "PP.H1.abf", "PP.H2.abf", "PP.H3.abf", "PP.H4.abf")))
stop("not a coloc summary vector")
## back calculate likelihoods
f <- function(p12)
prior.snp2hyp(summ["nsnps"],p12=p12,p1=p1,p2=p2)
pr1 <- f(newp12)
pr0 <- matrix(f(p12),nrow=nrow(pr1),ncol=ncol(pr1),byrow=TRUE)
newpp <- matrix(summ[-1],nrow=nrow(pr1),ncol=ncol(pr1),byrow=TRUE) * pr1/pr0 # prop to, not equal to
newpp/rowSums(newpp)
}


# copied from coloc package to make the above work
prior.snp2hyp <- function(nsnp,p12=1e-6,p1=1e-4,p2=1e-4) {
if(any(p12<p1*p2) || any(p12 > p1) || any(p12 > p2))
return(NULL)
tmp <- cbind(nsnp * p1,
nsnp * p2,
nsnp * (nsnp-1) * p1 * p2,
nsnp * p12)
tmp <- cbind(1-rowSums(tmp),tmp)
colnames(tmp) <- paste0("H",0:4)
tmp
}
34 changes: 19 additions & 15 deletions R/manhattan.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# silence R CMD checks for data.table columns
BETA = BP = CHR = P = SE = SNP = chr_len = tot = x = i.tot = highlight = NULL
BETA = BP = CHR = P = SE = SNP = chr_len = tot = x = i.tot = highlight = secondary_highlight = NULL

#' @title Manhattan plot
#' @description
Expand Down Expand Up @@ -65,7 +65,7 @@ manhattan <- function(gwas,
# annotate_snps = c("15:135277414[b37]G,T")

# create factor from CHR
gwas[, "CHR" := as.factor(CHR)]
gwas[, "CHR" := factor(CHR, levels=c(as.character(1:25), setdiff(as.character(unique(CHR)), as.character(1:25))))]

# down-sample the insignificant dots for plotting
if(downsample > 0) {
Expand Down Expand Up @@ -118,26 +118,19 @@ manhattan <- function(gwas,
)

# add highlighted variants within requested kb BP window
if(!is.null(highlight_snps)) {
if(!is.null(highlight_snps) & length(highlight_snps)>0) {
gwas[SNP %in% highlight_snps, highlight := TRUE]
if(highlight_win > 0) {
highlight_data <- gwas[SNP %in% highlight_snps, ]
for(i in 1:nrow(highlight_data)) {
bp <- highlight_data[[i, "BP"]]
chr<- highlight_data[[i, "CHR"]]
gwas[BP > (bp - highlight_win*1000) & BP < (bp + highlight_win*1000) & CHR == chr, highlight := TRUE]
gwas[BP > (bp - highlight_win*1000) & BP < (bp + highlight_win*1000) & CHR == chr, secondary_highlight := TRUE]
}
}
plot <- plot +
ggplot2::geom_point(data = gwas[highlight==TRUE, ], color=highlight_colour, shape=highlight_shape, alpha=highlight_alpha, size=1.5)
}

# add SNP label annotations
if(!is.null(annotate_snps)) {
gwas[SNP %in% annotate_snps, annotate := TRUE]
label_x_nudge <- max(gwas[["x"]], na.rm=TRUE) / 22
plot <- plot +
ggrepel::geom_label_repel(data=gwas[annotate==TRUE, ], ggplot2::aes(label=SNP), colour="black", label.size = 0.1, nudge_x=label_x_nudge )
ggplot2::geom_point(data = gwas[secondary_highlight==TRUE, ], color=highlight_colour, fill=highlight_colour, alpha=highlight_alpha) +
ggplot2::geom_point(data = gwas[highlight==TRUE, ], color=highlight_colour, fill=highlight_colour, shape=highlight_shape, alpha=highlight_alpha, size=3)
}

# add horizontal significance lines
Expand All @@ -148,6 +141,14 @@ manhattan <- function(gwas,
plot <- plot + ggplot2::geom_hline(yintercept = -log10(sig_line_2), linetype = "dashed", color = "darkgrey")
}

# add SNP label annotations
if(!is.null(annotate_snps) & length(annotate_snps)>0) {
gwas[SNP %in% annotate_snps, annotate := TRUE]
label_x_nudge <- max(gwas[["x"]], na.rm=TRUE) / 22
plot <- plot +
ggrepel::geom_label_repel(data=gwas[annotate==TRUE, ], ggplot2::aes(label=SNP), colour="black", label.size = 0.1, nudge_x=label_x_nudge )
}

# add titles and labels
plot <- plot + ggplot2::labs(title=title, subtitle=subtitle, x="Chromosome", y=log10P)

Expand Down Expand Up @@ -262,12 +263,15 @@ miami <- function(gwases,
if(!is.null(sig_line_2[[2]])) {
max_p <- max(c(max_p, -log10(sig_line_2[[2]])),na.rm=T)
}
y_limits <- c(ceiling(max_p), 0)
y_limits[[2]] <- c(ceiling(max_p), 0)
} else {
# user usually provides in the normal way c(0, upper)
y_limits[[2]] <- rev(y_limits[[2]])
}

# flip the bottom plot
plot_lower <- plot_lower +
ggplot2::scale_y_reverse(limits=y_limits, expand=c(0, 0)) +
ggplot2::scale_y_reverse(limits=y_limits[[2]], expand=c(0, 0)) +
ggplot2::scale_x_continuous(expand=c(0.01, 0.01), position = "top") +
ggplot2::theme(axis.title.x=ggplot2::element_blank(),
axis.line.x=ggplot2::element_blank(),
Expand Down
Loading

0 comments on commit 91ec5aa

Please sign in to comment.