This pacakge is written for annotating regions such as ChIPseq peaks, histone
marks or simply a genomic regions with nearest genes. It is written with focus
on compact genomes such fungi and bacteria where intergenic space is very small
(1-2kb) and ChIPseq peaks in tandem or divergent intergenic spaces or
overlapping genes may result in false positive when existing methods like
ChIPpeakAnno
or ChIPseeker
are used.
library(devtools)
devtools::install_github("lakhanp1/markPeaks")
markPeaks
uses TxDB
package to annotate the peaks or genomic regions.
For demonstration purpose, A. nidulans TxDB
package is used in example below.
## org.db
devtools::install_github(
"lakhanp1/fungal_resources/A_nidulans/org.Anidulans.FGSCA4.eg.db",
upgrade = "never")
## TxDb
devtools::install_github(
"lakhanp1/fungal_resources/A_nidulans/TxDb.Anidulans.FGSCA4.AspGD.GFF",
upgrade = "never")
## BSgenome
devtools::install_github(
"lakhanp1/fungal_resources/A_nidulans/BSgenome.Anidulans.FGSCA4.AspGD",
upgrade = "never")
suppressPackageStartupMessages(library(markPeaks))
suppressPackageStartupMessages(library(TxDb.Anidulans.FGSCA4.AspGD.GFF))
suppressPackageStartupMessages(library(org.Anidulans.FGSCA4.eg.db))
txDb <- TxDb.Anidulans.FGSCA4.AspGD.GFF
file_peaks <- system.file("data/test_sample.withCtrl_peak.narrowPeak", package = "markPeaks")
file_output <- "test_sample.withCtrl_peak.narrowPeak.annotation.tab"
#####################################################################
############################
#### Optional features #####
############################
## optional feature 1: exclude peaks overlapping with blacklist regions from annotation
## A. nidulans ChIPseq blacklist regions file. This file has regions in A. nidulans genome which show
## abnormal ChIPseq signal.
file_blacklist <- here::system.file("data", "A_nidulans_FGSC_A4.blacklist_regions.bed", package = "markPeaks")
## optional feature 2: exclude genes coding for tRNA, snRNA etc from annotation
txInfo <- suppressMessages(
AnnotationDbi::select(
x = txDb, keys = AnnotationDbi::keys(x = txDb, keytype = "TXID"),
columns = c("GENEID", "TXNAME", "TXTYPE"), keytype = "TXID")) %>%
dplyr::mutate(TXID = as.character(TXID)) %>%
dplyr::rename(geneId = GENEID, txName = TXNAME, txType = TXTYPE)
txInfo <- dplyr::filter(txInfo, !txType %in% c("tRNA", "rRNA", "snRNA", "snoRNA")) %>%
dplyr::filter(!grepl(pattern = "uORF", x = geneId))
############################
#### peak annotation #####
############################
## Annoate the peaks and store data in
peakAn <- markPeaks::annotate_peaks(
peakFile = file_peaks,
txdb = txDb,
txIds = txInfo$TXID, ## optional arg
fileFormat = "narrowPeak",
promoterLength = 500,
upstreamLimit = 1000,
bidirectionalDistance = 1000,
includeFractionCut = 0.7,
bindingInGene = FALSE,
insideSkewToEndCut = 0.7,
blacklistRegions = file_blacklist, ## optional arg
removePseudo = TRUE,
output = file_output ## optional arg
)
GPL V2