add docu for featurecounts

ScienceParkStudyGroup · Feb 7, 2024 · e24d6a6 · e24d6a6
1 parent 668e638
commit e24d6a6
Show file tree

Hide file tree

Showing 13 changed files with 1,283 additions and 11 deletions.
diff --git a/.quarto/idx/index.qmd.json b/.quarto/idx/index.qmd.json
diff --git a/.quarto/xref/7fa5e22e b/.quarto/xref/7fa5e22e
@@ -1 +1 @@
-{"entries":[],"headings":["useful-tutorials","getting-started-with-bash","using-r","bioinformatic-workflows","bioinformatic-tools-a-z"]}
+{"headings":["useful-tutorials","getting-started-with-bash","using-r","bioinformatic-workflows","bioinformatic-tools-a-z"],"entries":[]}
diff --git a/.quarto/xref/INDEX b/.quarto/xref/INDEX
@@ -146,5 +146,8 @@
   },
   "source/core_tools/samtools.qmd": {
     "samtools.html": "44dcd5a6"
+  },
+  "source/core_tools/featurecounts.qmd": {
+    "featurecounts.html": "5e0b8c7f"
   }
 }
diff --git a/_quarto.yml b/_quarto.yml
@@ -90,6 +90,7 @@ website:
             contents:
               - source/core_tools/bowtie.qmd
               - source/core_tools/samtools.qmd
+              - source/core_tools/featurecounts.qmd
 
           - section: "Functional annotation"
             contents:

diff --git a/docs/index.html b/docs/index.html
@@ -285,6 +285,12 @@
   <a href="./source/core_tools/samtools.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text">Samtools</span></a>
   </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./source/core_tools/featurecounts.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">FeatureCounts</span></a>
+  </div>
 </li>
       </ul>
   </li>
@@ -504,6 +510,7 @@ <h2 class="anchored" data-anchor-id="bioinformatic-tools-a-z">Bioinformatic tool
 <li><a href="./source/metagenomics/fama_readme.html">FAMA</a>: A fast pipeline for functional and taxonomic analysis of metagenomic sequences</li>
 <li><a href="./source/metatranscriptomics/fastp.html">FastP</a>: A tool for fast all-in-one preprocessing of FastQ files</li>
 <li><a href="./source/metagenomics/fastqc_readme.html">FastQC</a>: A quality control tool for read sequencing data</li>
+<li><a href="./source/core_tools/featurecounts.html">FeatureCounts</a>: A read summarization program that counts mapped reads for genomic features</li>
 <li><a href="./source/metagenomics/interproscan_readme.html">Interproscan</a>: A tool to scan protein and nucleic sequences against InterPro signatures</li>
 <li><a href="./source/ITSx/itsx_readme.html">ITSx</a>: A tool to extract ITS1 and ITS2 subregions from ITS sequences</li>
 <li><a href="./source/classification/kraken2.html">Kraken2</a>: A taxonomic sequence classifier using kmers</li>

diff --git a/docs/search.json b/docs/search.json
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
@@ -74,7 +74,7 @@
   </url>
   <url>
     <loc>https://scienceparkstudygroup.github.io/software_information/index.html</loc>
-    <lastmod>2024-02-07T09:33:30.693Z</lastmod>
+    <lastmod>2024-02-07T09:58:04.659Z</lastmod>
   </url>
   <url>
     <loc>https://scienceparkstudygroup.github.io/software_information/source/ITSx/itsx_readme.html</loc>
@@ -154,6 +154,10 @@
   </url>
   <url>
     <loc>https://scienceparkstudygroup.github.io/software_information/source/core_tools/samtools.html</loc>
-    <lastmod>2024-02-07T09:40:31.306Z</lastmod>
+    <lastmod>2024-02-07T09:42:32.778Z</lastmod>
+  </url>
+  <url>
+    <loc>https://scienceparkstudygroup.github.io/software_information/source/core_tools/featurecounts.html</loc>
+    <lastmod>2024-02-07T09:58:00.468Z</lastmod>
   </url>
 </urlset>
diff --git a/docs/source/core_tools/featurecounts.html b/docs/source/core_tools/featurecounts.html
diff --git a/docs/source/core_tools/samtools.html b/docs/source/core_tools/samtools.html
@@ -616,10 +616,10 @@ <h4 class="anchored" data-anchor-id="samtools-view">Samtools View</h4>
 <li>Count how many alignments have insertion or deletions (<code>wc -l</code>)</li>
 </ol>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co">#count total reads</span></span>
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co">#count total alignments</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="ex">SRR6344904_mapped_sorted.bam</span> <span class="kw">|</span> <span class="fu">wc</span> <span class="at">-l</span>  </span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co">#count mapped reads with ID events</span></span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co">#count mapped alignments with ID events</span></span>
 <span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="ex">samtools</span> view <span class="at">-F</span> 4 SRR6344904_mapped_sorted.bam <span class="kw">|</span> <span class="dt">\</span></span>
 <span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>     <span class="fu">cut</span> <span class="at">-f</span> 6 <span class="kw">|</span> <span class="dt">\</span></span>
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>     <span class="fu">grep</span> <span class="at">-P</span> <span class="st">'[ID]'</span> <span class="kw">|</span> <span class="dt">\</span></span>

diff --git a/index.qmd b/index.qmd
@@ -43,6 +43,7 @@ Please, be aware that this page is a work in progress and will be slowly updated
 -   [FAMA](source/metagenomics/fama_readme.qmd): A fast pipeline for functional and taxonomic analysis of metagenomic sequences
 -   [FastP](source/metatranscriptomics/fastp.qmd): A tool for fast all-in-one preprocessing of FastQ files
 -   [FastQC](source/metagenomics/fastqc_readme.qmd): A quality control tool for read sequencing data
+-   [FeatureCounts](source/core_tools/featurecounts.qmd): A read summarization program that counts mapped reads for genomic features
 -   [Interproscan](source/metagenomics/interproscan_readme.qmd): A tool to scan protein and nucleic sequences against InterPro signatures
 -   [ITSx](source/ITSx/itsx_readme.qmd): A tool to extract ITS1 and ITS2 subregions from ITS sequences
 -   [Kraken2](source/classification/kraken2.qmd): A taxonomic sequence classifier using kmers

diff --git a/source/core_tools/featurecounts.qmd b/source/core_tools/featurecounts.qmd
@@ -0,0 +1,57 @@
+---
+code-block-bg: true
+code-block-border-left: "#31BAE9"
+execute:
+  eval: false
+engine: knitr
+bibliography: references.bib
+---
+
+<div style="text-align: justify">
+
+## FeatureCounts
+
+### Introduction
+
+FeatureCounts is part of the Subread software package, a tool kit for processing next-gen sequencing data [@Liao2014]. It includes Subread aligner, Subjunc exon-exon junction detector and featureCounts read summarization program.
+
+FeatureCounts is a program that counts how many reads map to features, such as genes, exon, promoter and genomic bins. Therefore, it is useful to use after you, for example, aligned sequences (from a genome, metagenome, transcriptome) to reference sequences and want to generate a count table.
+
+A detailed documentation can be downloaded from [here](https://subread.sourceforge.net/featureCounts.html).
+
+### Installation
+
+Installed on crunchomics: No
+
+If you want to install it yourself, you can run:
+
+```{bash}
+mamba create -n subread_2.0.6
+mamba install -n subread_2.0.6 -c bioconda subread=2.0.6
+mamba activate subread_2.0.6
+```
+
+### Usage
+
+FeatureCounts takes as input a annotation file in gtf or gff format and a sorted bam file.
+
+It outputs a text file with the counts for each feature (in our example CDS) per sample. Notice, how you can use a wildcard to generate a counts table for multiple bam files at the same time.
+
+```{bash}
+featureCounts -T 5 -t CDS -g gene_id -M \
+    -a data/genome/genomic.gtf \
+    -o  results/featurecounts/ncbi_gtf/counts.txt \
+    results/bowtie/*_mapped_sorted.bam
+```
+
+Useful options:
+
+-   `-a` <string> Name of an annotation file. GTF/GFF format by default. See -F option for more format information. Inbuilt annotations (SAF format) is available in 'annotation' directory of the package. Gzipped file is also accepted.
+-   `-o` <string> Name of output file including read counts. A separate file including summary statistics of counting results is also included in the output ('<string>.summary'). Both files are in tab delimited format.
+-   `-t` <string> Specify feature type(s) in a GTF annotation. If multiple types are provided, they should be separated by ',' with no space in between. 'exon' by default. Rows in the annotation with a matched feature will be extracted and used for read mapping.
+-   `-g` <string> Specify attribute type in GTF annotation. 'gene_id' by default. Meta-features used for read counting will be extracted from annotation using the provided value.
+-   `-M` Multi-mapping reads will also be counted. For a multi- mapping read, all its reported alignments will be counted. The 'NH' tag in BAM/SAM input is used to detect multi-mapping reads.
+-   `-L` Count long reads such as Nanopore and PacBio reads. Long read counting can only run in one thread and only reads (not read-pairs) can be counted. There is no limitation on the number of 'M' operations allowed in a CIGAR string in long read counting.
+-   `--maxMOp` <int> Maximum number of 'M' operations allowed in a CIGAR string. 10 by default. Both 'X' and '=' are treated as 'M' and adjacent 'M' operations are merged in the CIGAR string.
+-   `-p` If specified, libraries are assumed to contain paired-end reads. For any library that contains paired-end reads, the 'countReadPairs' parameter controls if read pairs or reads should be counted.
+-   `-s` <int or string> Perform strand-specific read counting. A single integer value (applied to all input files) or a string of comma- separated values (applied to each corresponding input file) should be provided. Possible values include: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded read counting carried out for all input files).
diff --git a/source/core_tools/references.bib b/source/core_tools/references.bib
@@ -58,3 +58,18 @@ @article{Danecek2021
 	url = {http://dx.doi.org/10.1093/gigascience/giab008},
 	langid = {en}
 }
+
+@article{Liao2014,
+	title = {featureCounts: an efficient general purpose program for assigning sequence reads to genomic features},
+	author = {Liao, Yang and Smyth, Gordon K. and Shi, Wei},
+	year = {2013},
+	month = {11},
+	date = {2013-11-13},
+	journal = {Bioinformatics},
+	pages = {923--930},
+	volume = {30},
+	number = {7},
+	doi = {10.1093/bioinformatics/btt656},
+	url = {http://dx.doi.org/10.1093/bioinformatics/btt656},
+	langid = {en}
+}
diff --git a/source/core_tools/samtools.qmd b/source/core_tools/samtools.qmd
@@ -109,10 +109,10 @@ We can adjust this to ask very specific questions about our data as well. For ex
 4.  Count how many alignments have insertion or deletions (`wc -l`)
 
 ```{bash} 
-#count total reads
+#count total alignments
 SRR6344904_mapped_sorted.bam | wc -l  
 
-#count mapped reads with ID events
+#count mapped alignments with ID events
 samtools view -F 4 SRR6344904_mapped_sorted.bam | \
      cut -f 6 | \
      grep -P '[ID]' | \
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"entries":[],"headings":["useful-tutorials","getting-started-with-bash","using-r","bioinformatic-workflows","bioinformatic-tools-a-z"]}
		{"headings":["useful-tutorials","getting-started-with-bash","using-r","bioinformatic-workflows","bioinformatic-tools-a-z"],"entries":[]}