Merge pull request #55 from nf-core/add-ganon-custombuild

Add ganon custombuild
nf-core · Nov 28, 2024 · c1dae31 · c1dae31
2 parents 9d18e74 + a64fe78
commit c1dae31
Show file tree

Hide file tree

Showing 22 changed files with 680 additions and 226 deletions.
diff --git a/CITATIONS.md b/CITATIONS.md
@@ -40,6 +40,10 @@
 
   > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. https://doi.org/10.7717/peerj-cs.104
 
+- [ganon](https://doi.org/10.1093/bioinformatics/btaa458)
+
+  > Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. https://doi.org/10.1093/bioinformatics/btaa458
+
 - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
 
   > Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. https://doi.org/10.1101/gr.210641.116

diff --git a/README.md b/README.md
@@ -34,6 +34,7 @@
 2. Builds databases for:
    - [Bracken](https://doi.org/10.7717/peerj-cs.104)
    - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
+   - [ganon](https://doi.org/10.1093/bioinformatics/btaa458)
    - [DIAMOND](https://doi.org/10.1038/nmeth.3176)
    - [Kaiju](https://doi.org/10.1038/ncomms11257)
    - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)

diff --git a/conf/modules.config b/conf/modules.config
@@ -18,8 +18,8 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: 'MULTIQC' {
-        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
+    withName: MULTIQC {
+        ext.args   = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' }
         publishDir = [
             path: { "${params.outdir}/multiqc" },
             mode: params.publish_dir_mode,
@@ -47,6 +47,10 @@ process {
         ]
     }
 
+    withName: GANON_BUILD {
+        ext.args = { "--verbose" }
+    }
+
     withName: MALT_BUILD {
         ext.args = { "--sequenceType ${params.malt_sequencetype}" }
     }

diff --git a/conf/test.config b/conf/test.config
@@ -31,6 +31,7 @@ params {
 
     build_bracken              = true
     build_diamond              = true
+    build_ganon                = true
     build_kaiju                = true
     build_malt                 = true
     build_centrifuge           = true

diff --git a/conf/test_full.config b/conf/test_full.config
@@ -17,13 +17,14 @@ params {
     // Input data for full size test
     // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+    input                      = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
 
-    build_bracken    = true
-    build_diamond    = true
-    build_kaiju      = true
-    build_malt       = true
-    build_centrifuge = true
-    build_kraken2    = true
-    build_krakenuniq = true
+    build_bracken              = true
+    build_diamond              = true
+    build_ganon                = true
+    build_kaiju                = true
+    build_malt                 = true
+    build_centrifuge           = true
+    build_kraken2              = true
+    build_krakenuniq           = true
 }
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
@@ -22,6 +22,7 @@ params {
 
     build_bracken              = false
     build_diamond              = false
+    build_ganon                = false
     build_kaiju                = false
     build_malt                 = false
     build_centrifuge           = false

diff --git a/docs/output.md b/docs/output.md
@@ -14,7 +14,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
-- [Bracken](#bracken) - Database files for Brakcen
+- [Bracken](#bracken) - Database files for Bracken
+- [ganon](#ganon) - Database files for ganon
 - [Centrifuge](#centrifuge) - Database files for Centrifuge
 - [DIAMOND](#diamond) - Database files for DIAMOND
 - [Kaiju](#kaiju) - Database files for Kaiju
@@ -92,6 +93,20 @@ The resulting `<db_name>/` directory can be given to Bracken itself with `bracke
 
 A directory and `cf` files can be given to the Centrifuge command with `centrifuge -x /<path>/<to>/<cf_files_basename>` etc.
 
+### Ganon
+
+[ganon](https://github.com/pirovc/ganon/) classifies genomic sequences against large sets of references efficiently, with integrated download and update of databases (refseq/genbank), taxonomic profiling (ncbi/gtdb), binning and hierarchical classification, customized reporting and more.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `diamond/`
+  - `<database>.hibf`: main bloom filter index file
+  - `<database>.tax`: taxonomy tree used for taxonomy assignment
+  </details>
+
+The directory containing these two files can be given to ganon itself with using the name as a prefix, e.g., `ganon classify -d /<path>/<to>/<database name without extensions>`.
+
 ### Diamond
 
 [DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment.

diff --git a/main.nf b/main.nf
@@ -15,64 +15,20 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { CREATETAXDB  } from './workflows/createtaxdb'
+include { CREATETAXDB             } from './workflows/createtaxdb'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline'
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    NAMED WORKFLOWS FOR PIPELINE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-//
-// WORKFLOW: Run main analysis pipeline depending on type of input
-//
-workflow NFCORE_CREATETAXDB {
-
-    take:
-    samplesheet // channel: samplesheet read in from --input
-
-    main:
-
-    //
-    // WORKFLOW: Run pipeline
-    //
-    ch_samplesheet       = samplesheet
-    ch_taxonomy_namesdmp = file(params.namesdmp)
-    ch_taxonomy_nodesdmp = file(params.nodesdmp)
-    ch_accession2taxid   = file(params.accession2taxid)
-    ch_nucl2taxid        = file(params.nucl2taxid)
-    ch_prot2taxid        = file(params.prot2taxid)
-    ch_malt_mapdb        = file(params.malt_mapdb)
-
-
-    CREATETAXDB (
-        ch_samplesheet,
-        ch_taxonomy_namesdmp,
-        ch_taxonomy_nodesdmp,
-        ch_accession2taxid,
-        ch_nucl2taxid,
-        ch_prot2taxid,
-        ch_malt_mapdb,
-
-    )
-    emit:
-    multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html
-}
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
 workflow {
-
-    main:
     //
     // SUBWORKFLOW: Run initialisation tasks
     //
-    PIPELINE_INITIALISATION (
+    PIPELINE_INITIALISATION(
         params.version,
         params.validate_params,
         params.monochrome_logs,
@@ -84,13 +40,13 @@ workflow {
     //
     // WORKFLOW: Run main workflow
     //
-    NFCORE_CREATETAXDB (
+    NFCORE_CREATETAXDB(
         PIPELINE_INITIALISATION.out.samplesheet
     )
     //
     // SUBWORKFLOW: Run completion tasks
     //
-    PIPELINE_COMPLETION (
+    PIPELINE_COMPLETION(
         params.email,
         params.email_on_fail,
         params.plaintext_email,
@@ -103,6 +59,41 @@ workflow {
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    THE END
+    NAMED WORKFLOWS FOR PIPELINE
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
+
+//
+// WORKFLOW: Run main analysis pipeline depending on type of input
+//
+workflow NFCORE_CREATETAXDB {
+    take:
+    samplesheet // channel: samplesheet read in from --input
+
+    main:
+
+    //
+    // WORKFLOW: Run pipeline
+    //
+    ch_samplesheet = samplesheet
+    ch_taxonomy_namesdmp = file(params.namesdmp, checkIfExists: true)
+    ch_taxonomy_nodesdmp = file(params.nodesdmp, checkIfExists: true)
+    ch_accession2taxid = file(params.accession2taxid, checkIfExists: true)
+    ch_nucl2taxid = file(params.nucl2taxid, checkIfExists: true)
+    ch_prot2taxid = file(params.prot2taxid, checkIfExists: true)
+    ch_malt_mapdb = file(params.malt_mapdb, checkIfExists: true)
+
+
+    CREATETAXDB(
+        ch_samplesheet,
+        ch_taxonomy_namesdmp,
+        ch_taxonomy_nodesdmp,
+        ch_accession2taxid,
+        ch_nucl2taxid,
+        ch_prot2taxid,
+        ch_malt_mapdb
+    )
+
+    emit:
+    multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html
+}
diff --git a/modules.json b/modules.json
@@ -30,6 +30,11 @@
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
+                    "ganon/buildcustom": {
+                        "branch": "master",
+                        "git_sha": "4265ef4b3b9af8877671715b081f102041c64cfd",
+                        "installed_by": ["modules"]
+                    },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",

diff --git a/modules/nf-core/ganon/buildcustom/environment.yml b/modules/nf-core/ganon/buildcustom/environment.yml
diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf
diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml