From fb23ab0558b8d6a4bb0357a7151bfb3f52310d15 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 29 Oct 2024 13:03:39 +0100
Subject: [PATCH 1/9] Start adding gagnon

---
 modules.json                                  |  5 ++
 .../nf-core/ganon/buildcustom/environment.yml |  5 ++
 modules/nf-core/ganon/buildcustom/main.nf     | 60 +++++++++++++
 modules/nf-core/ganon/buildcustom/meta.yml    | 77 +++++++++++++++++
 .../ganon/buildcustom/tests/main.nf.test      | 69 +++++++++++++++
 .../ganon/buildcustom/tests/main.nf.test.snap | 72 ++++++++++++++++
 .../ganon/buildcustom/tests/nextflow.config   |  5 ++
 .../nf-core/ganon/buildcustom/tests/tags.yml  |  2 +
 workflows/createtaxdb.nf                      | 85 ++++++++++++-------
 9 files changed, 350 insertions(+), 30 deletions(-)
 create mode 100644 modules/nf-core/ganon/buildcustom/environment.yml
 create mode 100644 modules/nf-core/ganon/buildcustom/main.nf
 create mode 100644 modules/nf-core/ganon/buildcustom/meta.yml
 create mode 100644 modules/nf-core/ganon/buildcustom/tests/main.nf.test
 create mode 100644 modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/ganon/buildcustom/tests/nextflow.config
 create mode 100644 modules/nf-core/ganon/buildcustom/tests/tags.yml

diff --git a/modules.json b/modules.json
index cd86b5a..f681212 100644
--- a/modules.json
+++ b/modules.json
@@ -30,6 +30,11 @@
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
+                    "ganon/buildcustom": {
+                        "branch": "master",
+                        "git_sha": "58b4d685b1c93429917fec530f5d656aca3f2ef6",
+                        "installed_by": ["modules"]
+                    },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
diff --git a/modules/nf-core/ganon/buildcustom/environment.yml b/modules/nf-core/ganon/buildcustom/environment.yml
new file mode 100644
index 0000000..0e073d5
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::ganon=2.1.0
diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf
new file mode 100644
index 0000000..212e49f
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/main.nf
@@ -0,0 +1,60 @@
+process GANON_BUILDCUSTOM {
+    tag "${meta.id}"
+    label 'process_high'
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/ganon:2.1.0--py310hab1bfa5_1'
+        : 'biocontainers/ganon:2.1.0--py310hab1bfa5_1'}"
+
+    input:
+    tuple val(meta), path(input)
+    val input_type
+    path taxonomy_files
+    path genome_size_files
+
+    output:
+    tuple val(meta), path("*.{hibf,ibf,tax}"), emit: db
+    tuple val(meta), path("*.info.tsv"), emit: info, optional: true
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def input_cmd = input_type == 'fasta' ? "--input ${input}" : input_type == 'tsv' ? "--input-file ${input}" : error("Invalid input type: ${input_type}. Options: fasta, tsv")
+    def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : ""
+    def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : ""
+    """
+    ganon \\
+        build-custom \\
+        --threads ${task.cpus} \\
+        --input ${input} \\
+        --db-prefix ${prefix} \\
+        ${taxonomy_args} \\
+        ${genome_size_args} \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ganon: \$(echo \$(ganon --version 2>1) | sed 's/.*ganon //g')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : ""
+    def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : ""
+    """
+    touch ${prefix}.hibf
+    touch ${prefix}.tax
+    touch ${prefix}.info.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ganon: \$(echo \$(ganon --version 2>1) | sed 's/.*ganon //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml
new file mode 100644
index 0000000..39bc073
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/meta.yml
@@ -0,0 +1,77 @@
+name: "ganon_buildcustom"
+description: Build ganon database using custom reference sequences.
+keywords:
+  - ganon
+  - metagenomics
+  - profiling
+  - taxonomy
+  - k-mer
+  - database
+tools:
+  - "ganon":
+      description: "ganon classifies short DNA sequences against large sets of genomic
+        reference sequences efficiently"
+      homepage: "https://github.com/pirovc/ganon"
+      documentation: "https://github.com/pirovc/ganon"
+      tool_dev_url: "https://github.com/pirovc/ganon"
+      doi: "10.1093/bioinformatics/btaa458"
+      licence: ["MIT"]
+      identifier: biotools:ganon
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - input:
+        type: file
+        description: |
+          List of input FASTA files, or a directory containing input FASTA files.
+          Note you must supply --input-extension via ext.args if FASTA extensions do not end in the default `fna.gz`.
+        pattern: "*"
+  - - input_type:
+        type: string
+        description: |
+          Specify whether the file(s) given to the input channel are in FASTA format (and will be supplied as --input)
+          or in TSV format (and will be supplied as --input-file). For TSV format, the 'file' column should be just the
+          file name so that it's local to the working directory of this process.
+        pattern: "fasta|tsv"
+  - - taxonomy_files:
+        type: file
+        description: Pre-downloaded taxonomy files of input sequences. See ganon docs
+          for formats
+  - - genome_size_files:
+        type: file
+        description: Pre-downloaded NCBI or GTDB genome size files of input sequences.
+          See ganon docs for formats
+        pattern: "{species_genome_size.txt.gz,*_metadata.tar.gz}"
+output:
+  - db:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.{hibf,ibf,tax}":
+          type: file
+          description: ganon database files
+          pattern: "*.{ibf,tax}"
+  - info:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.info.tsv":
+          type: file
+          description: Copy of target info generated. Can be used for updating database.
+          pattern: "*info.tsv"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@jfy133"
+maintainers:
+  - "@jfy133"
diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test b/modules/nf-core/ganon/buildcustom/tests/main.nf.test
new file mode 100644
index 0000000..8fa4227
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test
@@ -0,0 +1,69 @@
+nextflow_process {
+
+    name "Test Process GANON_BUILDCUSTOM"
+    script "../main.nf"
+    process "GANON_BUILDCUSTOM"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "ganon"
+    tag "ganon/buildcustom"
+
+    test("sarscov2 - genome fasta") {
+
+        when {
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            ]
+                input[1] = 'fasta'
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                        process.out.db.get(0).get(1).findAll { file(it).name != "test.tax" },
+                        process.out.versions
+                    ).match()
+                },
+                { assert file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") },
+            )
+        }
+    }
+
+    test("sarscov2 - genome fasta - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            ]
+                input[1] = 'fasta'
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap
new file mode 100644
index 0000000..2c3243f
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+    "sarscov2 - genome fasta - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.hibf:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tax:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e"
+                ],
+                "db": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test.hibf:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tax:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "info": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-07T17:00:22.98042261"
+    },
+    "sarscov2 - genome fasta": {
+        "content": [
+            [
+                "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614"
+            ],
+            [
+                "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-07T19:03:25.060306554"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/ganon/buildcustom/tests/nextflow.config b/modules/nf-core/ganon/buildcustom/tests/nextflow.config
new file mode 100644
index 0000000..a12988e
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: GANON_BUILDCUSTOM {
+        ext.args = "--input-target sequence"
+    }
+}
diff --git a/modules/nf-core/ganon/buildcustom/tests/tags.yml b/modules/nf-core/ganon/buildcustom/tests/tags.yml
new file mode 100644
index 0000000..46c2aa4
--- /dev/null
+++ b/modules/nf-core/ganon/buildcustom/tests/tags.yml
@@ -0,0 +1,2 @@
+ganon/buildcustom:
+  - "modules/nf-core/ganon/buildcustom/**"
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index 51c2fe6..9fef848 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -4,11 +4,11 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { MULTIQC                } from '../modules/nf-core/multiqc/main'
-include { paramsSummaryMap       } from 'plugin/nf-schema'
-include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline'
+include { MULTIQC                            } from '../modules/nf-core/multiqc/main'
+include { paramsSummaryMap                   } from 'plugin/nf-schema'
+include { paramsSummaryMultiqc               } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { softwareVersionsToYAML             } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { methodsDescriptionText             } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline'
 
 // Preprocessing
 include { GUNZIP as GUNZIP_DNA               } from '../modules/nf-core/gunzip/main'
@@ -20,6 +20,7 @@ include { CAT_CAT as CAT_CAT_AA              } from '../modules/nf-core/cat/cat/
 // Database building (with specific auxiliary modules)
 include { CENTRIFUGE_BUILD                   } from '../modules/nf-core/centrifuge/build/main'
 include { DIAMOND_MAKEDB                     } from '../modules/nf-core/diamond/makedb/main'
+include { GANON_BUILDCUSTOM                  } from '../modules/nf-core/ganon/buildcustom/main'
 include { KAIJU_MKFMI                        } from '../modules/nf-core/kaiju/mkfmi/main'
 include { KRAKENUNIQ_BUILD                   } from '../modules/nf-core/krakenuniq/build/main'
 include { UNZIP                              } from '../modules/nf-core/unzip/main'
@@ -75,8 +76,9 @@ workflow CREATETAXDB {
         ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first())
 
         // Place in single file
-        ch_singleref_for_dna = CAT_CAT_DNA(ch_prepped_dna_fastas)
+        CAT_CAT_DNA(ch_prepped_dna_fastas)
         ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first())
+        ch_singleref_for_dna = CAT_CAT_DNA.out
     }
 
     // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju...
@@ -101,7 +103,8 @@ workflow CREATETAXDB {
         ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix(ch_aa_for_zipping.zipped).groupTuple()
         //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() )
 
-        ch_singleref_for_aa = CAT_CAT_AA(ch_prepped_aa_fastas)
+        CAT_CAT_AA(ch_prepped_aa_fastas)
+        ch_singleref_for_aa = CAT_CAT_AA.out_file
         ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first())
     }
 
@@ -114,7 +117,7 @@ workflow CREATETAXDB {
     // Module: Run CENTRIFUGE/BUILD
 
     if (params.build_centrifuge) {
-        CENTRIFUGE_BUILD(CAT_CAT_DNA.out.file_out, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, [])
+        CENTRIFUGE_BUILD(ch_singleref_for_dna, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, [])
         ch_versions = ch_versions.mix(CENTRIFUGE_BUILD.out.versions.first())
         ch_centrifuge_output = CENTRIFUGE_BUILD.out.cf
     }
@@ -125,7 +128,7 @@ workflow CREATETAXDB {
     // MODULE: Run DIAMOND/MAKEDB
 
     if (params.build_diamond) {
-        DIAMOND_MAKEDB(CAT_CAT_AA.out.file_out, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp)
+        DIAMOND_MAKEDB(ch_singleref_for_aa, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp)
         ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first())
         ch_diamond_output = DIAMOND_MAKEDB.out.db
     }
@@ -133,10 +136,27 @@ workflow CREATETAXDB {
         ch_diamond_output = Channel.empty()
     }
 
+    if (params.build_ganon) {
+        ch_ganon_input_tsv = ch_prepped_dna_fastas
+            .map { meta, file ->
+                [meta, file]
+                [file.name(), meta.id, meta.taxid]
+            }
+            .map { it.values().join("\t") }
+            .collectFile {
+                name: "ganon_input.tsv"
+                newLine: true
+            }
+
+        GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', tax_file, [])
+        ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first())
+        ch_ganon_output = GANON_BUILDCUSTOM.out.db
+    }
+
     // MODULE: Run KAIJU/MKFMI
 
     if (params.build_kaiju) {
-        KAIJU_MKFMI(CAT_CAT_AA.out.file_out)
+        KAIJU_MKFMI(ch_singleref_for_aa)
         ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first())
         ch_kaiju_output = KAIJU_MKFMI.out.fmi
     }
@@ -149,7 +169,7 @@ workflow CREATETAXDB {
     // Condition is inverted because subworkflow asks if you want to 'clean' (true) or not, but pipeline says to 'keep'
     if (params.build_kraken2 || params.build_bracken) {
         def k2_keepintermediates = params.kraken2_keepintermediate || params.build_bracken ? false : true
-        FASTA_BUILD_ADD_KRAKEN2_BRACKEN(CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken)
+        FASTA_BUILD_ADD_KRAKEN2_BRACKEN(ch_singleref_for_dna, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken)
         ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first())
         ch_kraken2_bracken_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db
     }
@@ -214,25 +234,31 @@ workflow CREATETAXDB {
     //
     // MODULE: MultiQC
     //
-    ch_multiqc_config        = Channel.fromPath(
-        "$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-    ch_multiqc_custom_config = params.multiqc_config ?
-        Channel.fromPath(params.multiqc_config, checkIfExists: true) :
-        Channel.empty()
-    ch_multiqc_logo          = params.multiqc_logo ?
-        Channel.fromPath(params.multiqc_logo, checkIfExists: true) :
-        Channel.empty()
-
-    summary_params      = paramsSummaryMap(
-        workflow, parameters_schema: "nextflow_schema.json")
+    ch_multiqc_config = Channel.fromPath(
+        "${projectDir}/assets/multiqc_config.yml",
+        checkIfExists: true
+    )
+    ch_multiqc_custom_config = params.multiqc_config
+        ? Channel.fromPath(params.multiqc_config, checkIfExists: true)
+        : Channel.empty()
+    ch_multiqc_logo = params.multiqc_logo
+        ? Channel.fromPath(params.multiqc_logo, checkIfExists: true)
+        : Channel.empty()
+
+    summary_params = paramsSummaryMap(
+        workflow,
+        parameters_schema: "nextflow_schema.json"
+    )
     ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
     ch_multiqc_files = ch_multiqc_files.mix(
-        ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
-    ch_multiqc_custom_methods_description = params.multiqc_methods_description ?
-        file(params.multiqc_methods_description, checkIfExists: true) :
-        file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-    ch_methods_description                = Channel.value(
-        methodsDescriptionText(ch_multiqc_custom_methods_description))
+        ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')
+    )
+    ch_multiqc_custom_methods_description = params.multiqc_methods_description
+        ? file(params.multiqc_methods_description, checkIfExists: true)
+        : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true)
+    ch_methods_description = Channel.value(
+        methodsDescriptionText(ch_multiqc_custom_methods_description)
+    )
 
     ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
     ch_multiqc_files = ch_multiqc_files.mix(
@@ -250,14 +276,13 @@ workflow CREATETAXDB {
         [],
         []
     )
-    multiqc_report = MULTIQC.out.report.toList()
 
     emit:
-    multiqc_report           = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
     versions                 = ch_versions // channel: [ path(versions.yml) ]
     multiqc_report           = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
     centrifuge_database      = ch_centrifuge_output
     diamond_database         = ch_diamond_output
+    ganon_database           = ch_ganon_output
     kaiju_database           = ch_kaiju_output
     kraken2_bracken_database = ch_kraken2_bracken_output
     krakenuniq_database      = ch_krakenuniq_output

From 4541dad0e4434a738f3f91910e231922f2efa211 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Sat, 9 Nov 2024 16:24:17 +0100
Subject: [PATCH 2/9] Continue work

---
 conf/modules.config                       |  4 +++
 modules/nf-core/ganon/buildcustom/main.nf |  2 +-
 nextflow.config                           |  8 +++---
 nextflow_schema.json                      | 31 +++++++++++++----------
 workflows/createtaxdb.nf                  | 30 ++++++++++++++--------
 5 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2ed4477..902867f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -47,6 +47,10 @@ process {
         ]
     }
 
+    withName: GANON_BUILD {
+        ext.args = {"--verbose"}
+    }
+
     withName: MALT_BUILD {
         ext.args = { "--sequenceType ${params.malt_sequencetype}" }
     }
diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf
index 212e49f..396f8e1 100644
--- a/modules/nf-core/ganon/buildcustom/main.nf
+++ b/modules/nf-core/ganon/buildcustom/main.nf
@@ -30,7 +30,7 @@ process GANON_BUILDCUSTOM {
     ganon \\
         build-custom \\
         --threads ${task.cpus} \\
-        --input ${input} \\
+        $input_cmd \\
         --db-prefix ${prefix} \\
         ${taxonomy_args} \\
         ${genome_size_args} \\
diff --git a/nextflow.config b/nextflow.config
index 6eac678..147fef9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -59,14 +59,16 @@ params {
 
     // tool specific options
     build_bracken            = false
+    build_centrifuge         = false
     build_diamond            = false
+    build_ganon              = false
     build_kaiju              = false
-    build_malt               = false
-    malt_sequencetype        = "DNA"
-    build_centrifuge         = false
     build_kraken2            = false
     kraken2_keepintermediate = false
     build_krakenuniq         = false
+    build_malt               = false
+    malt_sequencetype        = "DNA"
+
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0553172..f570b60 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -123,23 +123,15 @@
                     "fa_icon": "fas fa-toggle-on",
                     "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input."
                 },
-                "build_kaiju": {
+                "build_ganon": {
                     "type": "boolean",
-                    "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.",
+                    "description": "Turn on building of ganon database. Requires nucleotide FASTA file input.",
                     "fa_icon": "fas fa-toggle-on"
                 },
-                "build_malt": {
+                "build_kaiju": {
                     "type": "boolean",
-                    "fa_icon": "fas fa-toggle-on",
-                    "description": "Turn on building of MALT database. Requires nucleotide FASTA file input."
-                },
-                "malt_sequencetype": {
-                    "type": "string",
-                    "default": "DNA",
-                    "description": "Specify type of input sequence being given to MALT",
-                    "enum": ["DNA", "Protein"],
-                    "help_text": "Use to specify whether the reference sequences are DNA or Protein sequences. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` ",
-                    "fa_icon": "fas fa-dna"
+                    "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.",
+                    "fa_icon": "fas fa-toggle-on"
                 },
                 "build_kraken2": {
                     "type": "boolean",
@@ -155,6 +147,19 @@
                     "type": "boolean",
                     "fa_icon": "fas fa-toggle-on",
                     "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input."
+                },
+                "build_malt": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-toggle-on",
+                    "description": "Turn on building of MALT database. Requires nucleotide FASTA file input."
+                },
+                "malt_sequencetype": {
+                    "type": "string",
+                    "default": "DNA",
+                    "description": "Specify type of input sequence being given to MALT",
+                    "enum": ["DNA", "Protein"],
+                    "help_text": "Use to specify whether the reference sequences are DNA or Protein sequences. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` ",
+                    "fa_icon": "fas fa-dna"
                 }
             },
             "fa_icon": "fas fa-database"
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index 9fef848..f9f69c3 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -57,11 +57,11 @@ workflow CREATETAXDB {
 
     // PREPARE: Prepare input for single file inputs modules
 
-    if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq].any()) {
+    if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq, params.build_ganon].any()) {
         // Pull just DNA sequences
 
         ch_dna_refs_for_singleref = ch_samplesheet
-            .map { meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_dna] }
+            .map { meta, fasta_dna, fasta_aa -> [meta, fasta_dna] }
             .filter { meta, fasta_dna ->
                 fasta_dna
             }
@@ -72,7 +72,7 @@ workflow CREATETAXDB {
         }
 
         GUNZIP_DNA(ch_dna_for_unzipping.zipped)
-        ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).groupTuple()
+        ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).tap { ch_prepped_dna_fastas_ungrouped }.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple()
         ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first())
 
         // Place in single file
@@ -137,18 +137,26 @@ workflow CREATETAXDB {
     }
 
     if (params.build_ganon) {
-        ch_ganon_input_tsv = ch_prepped_dna_fastas
-            .map { meta, file ->
-                [meta, file]
-                [file.name(), meta.id, meta.taxid]
+        ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped
+            .map { meta, fasta ->
+                // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why
+                def fasta_name = fasta.toString().split('/').last()
+                [fasta_name, meta.id, meta.taxid]
             }
-            .map { it.values().join("\t") }
-            .collectFile {
-                name: "ganon_input.tsv"
+            .map { it.join("\t") }
+            .collectFile (
+                name: "ganon_fasta_input.tsv",
                 newLine: true
+            )
+            .map{
+                [[id: params.dbname], it]
             }
 
-        GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', tax_file, [])
+        // Nodes must come first
+        ch_ganon_tax_files = Channel.fromPath(ch_taxonomy_nodesdmp).combine(Channel.fromPath(ch_taxonomy_namesdmp))
+
+        // TODO Fix module so `input_cmd` is used and add test!
+        GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', ch_ganon_tax_files, [])
         ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first())
         ch_ganon_output = GANON_BUILDCUSTOM.out.db
     }

From 3227099e5f77609409de31b4c651a6e36a2d3bad Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Sat, 9 Nov 2024 16:24:50 +0100
Subject: [PATCH 3/9] Revert manual change to ganonbuild module (to upstreeam
 the fix)

---
 modules/nf-core/ganon/buildcustom/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf
index 396f8e1..212e49f 100644
--- a/modules/nf-core/ganon/buildcustom/main.nf
+++ b/modules/nf-core/ganon/buildcustom/main.nf
@@ -30,7 +30,7 @@ process GANON_BUILDCUSTOM {
     ganon \\
         build-custom \\
         --threads ${task.cpus} \\
-        $input_cmd \\
+        --input ${input} \\
         --db-prefix ${prefix} \\
         ${taxonomy_args} \\
         ${genome_size_args} \\

From 796fb7e526c04ed4f4ec5981079714f99fedfbfc Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 28 Nov 2024 10:13:18 +0100
Subject: [PATCH 4/9] Add working ganon-build (missing docs)

---
 modules.json                                  |  2 +-
 modules/nf-core/ganon/buildcustom/main.nf     |  6 +-
 modules/nf-core/ganon/buildcustom/meta.yml    | 12 ++--
 .../ganon/buildcustom/tests/main.nf.test      | 55 +++++++++++++++++--
 .../ganon/buildcustom/tests/main.nf.test.snap | 47 +++++++++++-----
 .../ganon/buildcustom/tests/nextflow.config   |  2 +-
 modules/nf-core/malt/build/main.nf            | 18 +++---
 nextflow.config                               |  2 +-
 workflows/createtaxdb.nf                      | 17 +++---
 9 files changed, 112 insertions(+), 49 deletions(-)

diff --git a/modules.json b/modules.json
index f681212..ebd0e4f 100644
--- a/modules.json
+++ b/modules.json
@@ -32,7 +32,7 @@
                     },
                     "ganon/buildcustom": {
                         "branch": "master",
-                        "git_sha": "58b4d685b1c93429917fec530f5d656aca3f2ef6",
+                        "git_sha": "4265ef4b3b9af8877671715b081f102041c64cfd",
                         "installed_by": ["modules"]
                     },
                     "gunzip": {
diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf
index 212e49f..41ffd68 100644
--- a/modules/nf-core/ganon/buildcustom/main.nf
+++ b/modules/nf-core/ganon/buildcustom/main.nf
@@ -8,7 +8,7 @@ process GANON_BUILDCUSTOM {
 
     input:
     tuple val(meta), path(input)
-    val input_type
+    path input_tsv
     path taxonomy_files
     path genome_size_files
 
@@ -23,14 +23,14 @@ process GANON_BUILDCUSTOM {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    def input_cmd = input_type == 'fasta' ? "--input ${input}" : input_type == 'tsv' ? "--input-file ${input}" : error("Invalid input type: ${input_type}. Options: fasta, tsv")
+    def input_cmd = input_tsv ? "--input-file ${input_tsv}" : "--input ${input}"
     def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : ""
     def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : ""
     """
     ganon \\
         build-custom \\
         --threads ${task.cpus} \\
-        --input ${input} \\
+        ${input_cmd} \\
         --db-prefix ${prefix} \\
         ${taxonomy_args} \\
         ${genome_size_args} \\
diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml
index 39bc073..5c481ec 100644
--- a/modules/nf-core/ganon/buildcustom/meta.yml
+++ b/modules/nf-core/ganon/buildcustom/meta.yml
@@ -28,14 +28,14 @@ input:
         description: |
           List of input FASTA files, or a directory containing input FASTA files.
           Note you must supply --input-extension via ext.args if FASTA extensions do not end in the default `fna.gz`.
-        pattern: "*"
-  - - input_type:
+        pattern: "*.{fasta,fna,fa,fa,fasta.gz,fna.gz,fa.gz,fa.gz}"
+  - - input_tsv:
         type: string
         description: |
-          Specify whether the file(s) given to the input channel are in FASTA format (and will be supplied as --input)
-          or in TSV format (and will be supplied as --input-file). For TSV format, the 'file' column should be just the
-          file name so that it's local to the working directory of this process.
-        pattern: "fasta|tsv"
+          (Optional) Specify an TSV file containing the paths, and relevant metadata to the input FASTA files to use the `--input-file` option.
+          The 'file' column should be just the file name of each FASTA file (so that it's local to the working directory of the process).
+          See ganon documentation for more more information on the other columns.
+        pattern: "*tsv"
   - - taxonomy_files:
         type: file
         description: Pre-downloaded taxonomy files of input sequences. See ganon docs
diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test b/modules/nf-core/ganon/buildcustom/tests/main.nf.test
index 8fa4227..9fe3948 100644
--- a/modules/nf-core/ganon/buildcustom/tests/main.nf.test
+++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test
@@ -10,16 +10,19 @@ nextflow_process {
     tag "ganon"
     tag "ganon/buildcustom"
 
-    test("sarscov2 - genome fasta") {
+    test("sarscov2 - genome - fasta") {
 
         when {
+            params {
+                module_args = '--input-target sequence'
+            }
             process {
                 """
                 input[0] =  [
                                 [ id:'test' ], // meta map
                                 file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
                             ]
-                input[1] = 'fasta'
+                input[1] = []
                 input[2] = []
                 input[3] = []
                 """
@@ -31,10 +34,49 @@ nextflow_process {
                 { assert process.success },
                 { assert snapshot(
                         process.out.db.get(0).get(1).findAll { file(it).name != "test.tax" },
-                        process.out.versions
+                        process.out.versions,
+                        file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1")
                     ).match()
                 },
-                { assert file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") },
+            )
+        }
+    }
+
+test("sarscov2 - genome - tsv") {
+
+        when {
+            params {
+                module_args = '--input-target file'
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            ]
+                input[1] = ch_ganon_input_tsv = Channel.of(["genome.fasta", "Severe_acute_respiratory_syndrome_coronavirus_2", "2697049"]).
+                                map { it.join("\t") }
+                                .collectFile (
+                                    name: "ganon_fasta_input.tsv",
+                                    newLine: true
+                                )
+                input[2] = [
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true),
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true)
+                            ]
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                        process.out.db.get(0).get(1),
+                        process.out.versions
+                    ).match()
+                }
             )
         }
     }
@@ -44,13 +86,16 @@ nextflow_process {
         options "-stub"
 
         when {
+            params {
+                module_args = ''
+            }
             process {
                 """
                 input[0] =  [
                                 [ id:'test' ], // meta map
                                 file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
                             ]
-                input[1] = 'fasta'
+                input[1] = []
                 input[2] = []
                 input[3] = []
                 """
diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap
index 2c3243f..e27a749 100644
--- a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap
+++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap
@@ -1,4 +1,36 @@
 {
+    "sarscov2 - genome - tsv": {
+        "content": [
+            [
+                "test.hibf:md5,9edfe4c3873d621a88ebcad438dca42c",
+                "test.tax:md5,e15400a1e43cce61545834695da46465"
+            ],
+            [
+                "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-11-13T13:37:14.320278404"
+    },
+    "sarscov2 - genome - fasta": {
+        "content": [
+            [
+                "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614"
+            ],
+            [
+                "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e"
+            ],
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-11-13T13:36:52.317157496"
+    },
     "sarscov2 - genome fasta - stub": {
         "content": [
             {
@@ -53,20 +85,5 @@
             "nextflow": "24.04.4"
         },
         "timestamp": "2024-10-07T17:00:22.98042261"
-    },
-    "sarscov2 - genome fasta": {
-        "content": [
-            [
-                "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614"
-            ],
-            [
-                "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
-        },
-        "timestamp": "2024-10-07T19:03:25.060306554"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/ganon/buildcustom/tests/nextflow.config b/modules/nf-core/ganon/buildcustom/tests/nextflow.config
index a12988e..15c20b6 100644
--- a/modules/nf-core/ganon/buildcustom/tests/nextflow.config
+++ b/modules/nf-core/ganon/buildcustom/tests/nextflow.config
@@ -1,5 +1,5 @@
 process {
     withName: GANON_BUILDCUSTOM {
-        ext.args = "--input-target sequence"
+        ext.args = params.module_args
     }
 }
diff --git a/modules/nf-core/malt/build/main.nf b/modules/nf-core/malt/build/main.nf
index 6f05e9e..710c82e 100644
--- a/modules/nf-core/malt/build/main.nf
+++ b/modules/nf-core/malt/build/main.nf
@@ -1,11 +1,9 @@
 process MALT_BUILD {
-
     label 'process_high'
-
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' :
-        'biocontainers/malt:0.61--hdfd78af_0' }"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0'
+        : 'biocontainers/malt:0.61--hdfd78af_0'}"
 
     input:
     path fastas
@@ -13,8 +11,8 @@ process MALT_BUILD {
     path mapping_db
 
     output:
-    path "malt_index/"   , emit: index
-    path "versions.yml"  , emit: versions
+    path "malt_index/", emit: index
+    path "versions.yml", emit: versions
     path "malt-build.log", emit: log
 
     when:
@@ -28,10 +26,10 @@ process MALT_BUILD {
     malt-build \\
         -v \\
         --input ${fastas.join(' ')} \\
-        $igff \\
+        ${igff} \\
         -d 'malt_index/' \\
-        -t $task.cpus \\
-        $args \\
+        -t ${task.cpus} \\
+        ${args} \\
         -mdb ${mapping_db}/*.db |&tee malt-build.log
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/nextflow.config b/nextflow.config
index 147fef9..eab3400 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -218,7 +218,7 @@ set -C # No clobber - prevent output redirection from overwriting files.
 // Disable process selector warnings by default. Use debug profile to enable warnings.
 nextflow.enable.configProcessNamesValidation = false
 
-def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
+trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
 timeline {
     enabled = true
     file    = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index f9f69c3..34712c7 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -61,8 +61,8 @@ workflow CREATETAXDB {
         // Pull just DNA sequences
 
         ch_dna_refs_for_singleref = ch_samplesheet
-            .map { meta, fasta_dna, fasta_aa -> [meta, fasta_dna] }
-            .filter { meta, fasta_dna ->
+            .map { meta, fasta_dna, _fasta_aa -> [meta, fasta_dna] }
+            .filter { _meta, fasta_dna ->
                 fasta_dna
             }
 
@@ -72,7 +72,8 @@ workflow CREATETAXDB {
         }
 
         GUNZIP_DNA(ch_dna_for_unzipping.zipped)
-        ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).tap { ch_prepped_dna_fastas_ungrouped }.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple()
+        ch_prepped_dna_fastas_ungrouped = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped)
+        ch_prepped_dna_fastas = ch_prepped_dna_fastas_ungrouped.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple()
         ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first())
 
         // Place in single file
@@ -137,6 +138,9 @@ workflow CREATETAXDB {
     }
 
     if (params.build_ganon) {
+
+        ch_ganon_input_fastas = ch_prepped_dna_fastas_ungrouped.collect()
+
         ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped
             .map { meta, fasta ->
                 // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why
@@ -144,19 +148,18 @@ workflow CREATETAXDB {
                 [fasta_name, meta.id, meta.taxid]
             }
             .map { it.join("\t") }
-            .collectFile (
+            .collectFile(
                 name: "ganon_fasta_input.tsv",
                 newLine: true
             )
-            .map{
+            .map {
                 [[id: params.dbname], it]
             }
 
         // Nodes must come first
         ch_ganon_tax_files = Channel.fromPath(ch_taxonomy_nodesdmp).combine(Channel.fromPath(ch_taxonomy_namesdmp))
 
-        // TODO Fix module so `input_cmd` is used and add test!
-        GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', ch_ganon_tax_files, [])
+        GANON_BUILDCUSTOM(ch_prepped_dna_fastas, ch_ganon_input_tsv.map { _meta, tsv -> tsv }, ch_ganon_tax_files, [])
         ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first())
         ch_ganon_output = GANON_BUILDCUSTOM.out.db
     }

From 7408229bc846feb1eeed877b4e47a87a4b941a6d Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 28 Nov 2024 12:13:25 +0100
Subject: [PATCH 5/9] Add better input validation tests

---
 conf/modules.config                           |   6 +-
 nextflow.config                               | 236 +++++++++---------
 .../utils_nfcore_createtaxdb_pipeline/main.nf |  27 +-
 workflows/createtaxdb.nf                      |   4 +-
 4 files changed, 150 insertions(+), 123 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 902867f..f1b2838 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -18,8 +18,8 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: 'MULTIQC' {
-        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
+    withName: MULTIQC {
+        ext.args   = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' }
         publishDir = [
             path: { "${params.outdir}/multiqc" },
             mode: params.publish_dir_mode,
@@ -48,7 +48,7 @@ process {
     }
 
     withName: GANON_BUILD {
-        ext.args = {"--verbose"}
+        ext.args = { "--verbose" }
     }
 
     withName: MALT_BUILD {
diff --git a/nextflow.config b/nextflow.config
index eab3400..65cdbb4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -11,14 +11,14 @@ params {
 
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
-    input                      = null
+    input                        = null
 
     // MultiQC options
-    multiqc_config             = null
-    multiqc_title              = null
-    multiqc_logo               = null
-    max_multiqc_email_size     = '25.MB'
-    multiqc_methods_description = null
+    multiqc_config               = null
+    multiqc_title                = null
+    multiqc_logo                 = null
+    max_multiqc_email_size       = '25.MB'
+    multiqc_methods_description  = null
 
     // Boilerplate options
     outdir                       = null
@@ -35,40 +35,39 @@ params {
     pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
 
     // Config options
-    config_profile_name        = null
-    config_profile_description = null
+    config_profile_name          = null
+    config_profile_description   = null
 
-    custom_config_version      = 'master'
-    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
-    config_profile_contact     = null
-    config_profile_url         = null
+    custom_config_version        = 'master'
+    custom_config_base           = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
+    config_profile_contact       = null
+    config_profile_url           = null
 
     // Schema validation default options
-    validate_params            = true
+    validate_params              = true
 
     // General parameters
-    dbname                   = null
-    save_concatenated_fastas = false
+    dbname                       = null
+    save_concatenated_fastas     = false
 
-    accession2taxid = null
-    prot2taxid      = null
-    nucl2taxid      = null
-    nodesdmp        = null
-    namesdmp        = null
-    malt_mapdb      = null
+    accession2taxid              = null
+    prot2taxid                   = null
+    nucl2taxid                   = null
+    nodesdmp                     = null
+    namesdmp                     = null
+    malt_mapdb                   = null
 
     // tool specific options
-    build_bracken            = false
-    build_centrifuge         = false
-    build_diamond            = false
-    build_ganon              = false
-    build_kaiju              = false
-    build_kraken2            = false
-    kraken2_keepintermediate = false
-    build_krakenuniq         = false
-    build_malt               = false
-    malt_sequencetype        = "DNA"
-
+    build_bracken                = false
+    build_centrifuge             = false
+    build_diamond                = false
+    build_ganon                  = false
+    build_kaiju                  = false
+    build_kraken2                = false
+    kraken2_keepintermediate     = false
+    build_krakenuniq             = false
+    build_malt                   = false
+    malt_sequencetype            = "DNA"
 }
 
 // Load base.config by default for all pipelines
@@ -76,90 +75,90 @@ includeConfig 'conf/base.config'
 
 profiles {
     debug {
-        dumpHashes              = true
-        process.beforeScript    = 'echo $HOSTNAME'
-        cleanup                 = false
+        dumpHashes                                   = true
+        process.beforeScript                         = 'echo $HOSTNAME'
+        cleanup                                      = false
         nextflow.enable.configProcessNamesValidation = true
     }
     conda {
-        conda.enabled           = true
-        docker.enabled          = false
-        singularity.enabled     = false
-        podman.enabled          = false
-        shifter.enabled         = false
-        charliecloud.enabled    = false
-        conda.channels          = ['conda-forge', 'bioconda']
-        apptainer.enabled       = false
+        conda.enabled        = true
+        docker.enabled       = false
+        singularity.enabled  = false
+        podman.enabled       = false
+        shifter.enabled      = false
+        charliecloud.enabled = false
+        conda.channels       = ['conda-forge', 'bioconda']
+        apptainer.enabled    = false
     }
     mamba {
-        conda.enabled           = true
-        conda.useMamba          = true
-        docker.enabled          = false
-        singularity.enabled     = false
-        podman.enabled          = false
-        shifter.enabled         = false
-        charliecloud.enabled    = false
-        apptainer.enabled       = false
+        conda.enabled        = true
+        conda.useMamba       = true
+        docker.enabled       = false
+        singularity.enabled  = false
+        podman.enabled       = false
+        shifter.enabled      = false
+        charliecloud.enabled = false
+        apptainer.enabled    = false
     }
     docker {
-        docker.enabled          = true
-        conda.enabled           = false
-        singularity.enabled     = false
-        podman.enabled          = false
-        shifter.enabled         = false
-        charliecloud.enabled    = false
-        apptainer.enabled       = false
-        docker.runOptions       = '-u $(id -u):$(id -g)'
+        docker.enabled       = true
+        conda.enabled        = false
+        singularity.enabled  = false
+        podman.enabled       = false
+        shifter.enabled      = false
+        charliecloud.enabled = false
+        apptainer.enabled    = false
+        docker.runOptions    = '-u $(id -u):$(id -g)'
     }
     arm {
-        docker.runOptions       = '-u $(id -u):$(id -g) --platform=linux/amd64'
+        docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
     }
     singularity {
-        singularity.enabled     = true
-        singularity.autoMounts  = true
-        conda.enabled           = false
-        docker.enabled          = false
-        podman.enabled          = false
-        shifter.enabled         = false
-        charliecloud.enabled    = false
-        apptainer.enabled       = false
+        singularity.enabled    = true
+        singularity.autoMounts = true
+        conda.enabled          = false
+        docker.enabled         = false
+        podman.enabled         = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+        apptainer.enabled      = false
     }
     podman {
-        podman.enabled          = true
-        conda.enabled           = false
-        docker.enabled          = false
-        singularity.enabled     = false
-        shifter.enabled         = false
-        charliecloud.enabled    = false
-        apptainer.enabled       = false
+        podman.enabled       = true
+        conda.enabled        = false
+        docker.enabled       = false
+        singularity.enabled  = false
+        shifter.enabled      = false
+        charliecloud.enabled = false
+        apptainer.enabled    = false
     }
     shifter {
-        shifter.enabled         = true
-        conda.enabled           = false
-        docker.enabled          = false
-        singularity.enabled     = false
-        podman.enabled          = false
-        charliecloud.enabled    = false
-        apptainer.enabled       = false
+        shifter.enabled      = true
+        conda.enabled        = false
+        docker.enabled       = false
+        singularity.enabled  = false
+        podman.enabled       = false
+        charliecloud.enabled = false
+        apptainer.enabled    = false
     }
     charliecloud {
-        charliecloud.enabled    = true
-        conda.enabled           = false
-        docker.enabled          = false
-        singularity.enabled     = false
-        podman.enabled          = false
-        shifter.enabled         = false
-        apptainer.enabled       = false
+        charliecloud.enabled = true
+        conda.enabled        = false
+        docker.enabled       = false
+        singularity.enabled  = false
+        podman.enabled       = false
+        shifter.enabled      = false
+        apptainer.enabled    = false
     }
     apptainer {
-        apptainer.enabled       = true
-        apptainer.autoMounts    = true
-        conda.enabled           = false
-        docker.enabled          = false
-        singularity.enabled     = false
-        podman.enabled          = false
-        shifter.enabled         = false
-        charliecloud.enabled    = false
+        apptainer.enabled    = true
+        apptainer.autoMounts = true
+        conda.enabled        = false
+        docker.enabled       = false
+        singularity.enabled  = false
+        podman.enabled       = false
+        shifter.enabled      = false
+        charliecloud.enabled = false
     }
     wave {
         apptainer.ociAutoPull   = true
@@ -169,14 +168,19 @@ profiles {
         wave.strategy           = 'conda,container'
     }
     gitpod {
-        executor.name           = 'local'
-        executor.cpus           = 4
-        executor.memory         = 8.GB
+        executor.name   = 'local'
+        executor.cpus   = 4
+        executor.memory = 8.GB
+    }
+    test {
+        includeConfig 'conf/test.config'
+    }
+    test_full {
+        includeConfig 'conf/test_full.config'
+    }
+    test_nothing {
+        includeConfig 'conf/test_nothing.config'
     }
-    test      { includeConfig 'conf/test.config'      }
-    test_full { includeConfig 'conf/test_full.config' }
-    test_nothing { includeConfig 'conf/test_nothing.config'      }
-
 }
 
 // Load nf-core custom profiles from different Institutions
@@ -188,10 +192,10 @@ includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${pa
 // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile
 // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled
 // Set to your registry if you have a mirror of containers
-apptainer.registry    = 'quay.io'
-docker.registry       = 'quay.io'
-podman.registry       = 'quay.io'
-singularity.registry  = 'quay.io'
+apptainer.registry = 'quay.io'
+docker.registry = 'quay.io'
+podman.registry = 'quay.io'
+singularity.registry = 'quay.io'
 charliecloud.registry = 'quay.io'
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
@@ -218,7 +222,7 @@ set -C # No clobber - prevent output redirection from overwriting files.
 // Disable process selector warnings by default. Use debug profile to enable warnings.
 nextflow.enable.configProcessNamesValidation = false
 
-trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
+trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
 timeline {
     enabled = true
     file    = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
@@ -249,17 +253,17 @@ manifest {
 
 // Nextflow plugins
 plugins {
-    id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-schema@2.1.1'
 }
 
 validation {
     defaultIgnoreParams = ["genomes"]
     help {
-        enabled = true
-        command = "nextflow run $manifest.name -profile <docker/singularity/.../institute> --input samplesheet.csv --outdir <OUTDIR>"
-        fullParameter = "help_full"
+        enabled             = true
+        command             = "nextflow run ${manifest.name} -profile <docker/singularity/.../institute> --input samplesheet.csv --outdir <OUTDIR>"
+        fullParameter       = "help_full"
         showHiddenParameter = "show_hidden"
-        beforeText = """
+        beforeText          = """
 -\033[2m----------------------------------------------------\033[0m-
                                         \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m
 \033[0;34m        ___     __   __   __   ___     \033[0;32m/,-._.--~\'\033[0m
@@ -269,7 +273,7 @@ validation {
 \033[0;35m  ${manifest.name} ${manifest.version}\033[0m
 -\033[2m----------------------------------------------------\033[0m-
 """
-        afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { "  https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""}
+        afterText           = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { "  https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${manifest.doi ? "\n" : ""}
 * The nf-core framework
     https://doi.org/10.1038/s41587-020-0439-x
 
@@ -279,7 +283,7 @@ validation {
     }
     summary {
         beforeText = validation.help.beforeText
-        afterText = validation.help.afterText
+        afterText  = validation.help.afterText
     }
 }
 
diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
index c86caa0..589b799 100644
--- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
@@ -130,10 +130,35 @@ workflow PIPELINE_COMPLETION {
 //
 def validateInputParameters() {
 
-    // Validate DIAMOND parameter combinations
+    // Validate CENTRIFUGE auxiliary file combinations
+    if (params.build_centrifuge && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) {
+        error('[nf-core/createtaxdb] Supplied --build_centrifuge, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for CENTRIFUGE)')
+    }
+
+    // Validate DIAMOND auxiliary file combinations
     if (params.build_diamond && [!params.prot2taxid, !params.nodesdmp, !params.namesdmp].any()) {
         error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp (all are mandatory for DIAMOND)')
     }
+
+    // Validate GANON parameter combinations
+    if (params.build_ganon && [!params.nodesdmp, !params.namesdmp].any()) {
+        error('[nf-core/createtaxdb] Supplied --build_ganon, but missing at least one of: --nodesdmp, or --namesdmp (all are mandatory for GANON)')
+    }
+
+    // Validate BRACKEN/KRAKEN parameter combinations
+    if ((params.build_bracken || params.build_kraken2) && [!params.accession2taxid, !params.nodesdmp, !params.namesdmp].any()) {
+        error('[nf-core/createtaxdb] Supplied --build_kraken2 or --bracken, but missing at least one of: --accession2taxid, --nodesdmp, or --namesdmp (all are mandatory for BRACKEN/KRAKEN2)')
+    }
+
+    // Validate KRAKENUNIQ auxiliary file combinations
+    if (params.build_krakenuniq && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) {
+        error('[nf-core/createtaxdb] Supplied --build_krakenuniq, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for KRAKENUNIQ)')
+    }
+
+    // Validate MALT auxiliary file combinations
+    if (params.build_krakenuniq && [!params.malt_mapdb].any()) {
+        error('[nf-core/createtaxdb] Supplied --build_malt, but missing: --malt_mapdb (all are mandatory for MALT)')
+    }
 }
 
 //
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index 34712c7..704faa8 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -139,8 +139,6 @@ workflow CREATETAXDB {
 
     if (params.build_ganon) {
 
-        ch_ganon_input_fastas = ch_prepped_dna_fastas_ungrouped.collect()
-
         ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped
             .map { meta, fasta ->
                 // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why
@@ -192,7 +190,7 @@ workflow CREATETAXDB {
     if (params.build_krakenuniq) {
 
         ch_taxdmpfiles_for_krakenuniq = Channel.of(ch_taxonomy_namesdmp).combine(Channel.of(ch_taxonomy_nodesdmp)).map { [it] }
-        ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, reads, taxdump -> [meta, reads, taxdump, ch_nucl2taxid] }
+        ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, fastas, taxdump -> [meta, fastas, taxdump, ch_nucl2taxid] }
 
         KRAKENUNIQ_BUILD(ch_input_for_krakenuniq)
         ch_versions = ch_versions.mix(KRAKENUNIQ_BUILD.out.versions.first())

From af9a8f645fdc458125f9873a316b2c4d6d84798b Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 28 Nov 2024 13:26:35 +0100
Subject: [PATCH 6/9] Add reference to ganon across all test profiles and fix
 typo

---
 conf/test.config         |  1 +
 conf/test_full.config    | 17 ++++----
 conf/test_nothing.config |  1 +
 main.nf                  | 89 ++++++++++++++++++----------------------
 tests/test.nf.test       |  2 +
 workflows/createtaxdb.nf |  7 +++-
 6 files changed, 58 insertions(+), 59 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 93cf208..d582ba1 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -31,6 +31,7 @@ params {
 
     build_bracken              = true
     build_diamond              = true
+    build_ganon                = true
     build_kaiju                = true
     build_malt                 = true
     build_centrifuge           = true
diff --git a/conf/test_full.config b/conf/test_full.config
index a628fd1..f14adca 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -17,13 +17,14 @@ params {
     // Input data for full size test
     // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+    input                      = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
 
-    build_bracken    = true
-    build_diamond    = true
-    build_kaiju      = true
-    build_malt       = true
-    build_centrifuge = true
-    build_kraken2    = true
-    build_krakenuniq = true
+    build_bracken              = true
+    build_diamond              = true
+    build_ganon                = true
+    build_kaiju                = true
+    build_malt                 = true
+    build_centrifuge           = true
+    build_kraken2              = true
+    build_krakenuniq           = true
 }
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index 1f22ce2..b39e675 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -22,6 +22,7 @@ params {
 
     build_bracken              = false
     build_diamond              = false
+    build_ganon                = false
     build_kaiju                = false
     build_malt                 = false
     build_centrifuge           = false
diff --git a/main.nf b/main.nf
index 81aefc5..ec245d9 100644
--- a/main.nf
+++ b/main.nf
@@ -15,51 +15,9 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { CREATETAXDB  } from './workflows/createtaxdb'
+include { CREATETAXDB             } from './workflows/createtaxdb'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline'
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    NAMED WORKFLOWS FOR PIPELINE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-//
-// WORKFLOW: Run main analysis pipeline depending on type of input
-//
-workflow NFCORE_CREATETAXDB {
-
-    take:
-    samplesheet // channel: samplesheet read in from --input
-
-    main:
-
-    //
-    // WORKFLOW: Run pipeline
-    //
-    ch_samplesheet       = samplesheet
-    ch_taxonomy_namesdmp = file(params.namesdmp)
-    ch_taxonomy_nodesdmp = file(params.nodesdmp)
-    ch_accession2taxid   = file(params.accession2taxid)
-    ch_nucl2taxid        = file(params.nucl2taxid)
-    ch_prot2taxid        = file(params.prot2taxid)
-    ch_malt_mapdb        = file(params.malt_mapdb)
-
-
-    CREATETAXDB (
-        ch_samplesheet,
-        ch_taxonomy_namesdmp,
-        ch_taxonomy_nodesdmp,
-        ch_accession2taxid,
-        ch_nucl2taxid,
-        ch_prot2taxid,
-        ch_malt_mapdb,
-
-    )
-    emit:
-    multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html
-}
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -67,12 +25,10 @@ workflow NFCORE_CREATETAXDB {
 */
 
 workflow {
-
-    main:
     //
     // SUBWORKFLOW: Run initialisation tasks
     //
-    PIPELINE_INITIALISATION (
+    PIPELINE_INITIALISATION(
         params.version,
         params.validate_params,
         params.monochrome_logs,
@@ -84,13 +40,13 @@ workflow {
     //
     // WORKFLOW: Run main workflow
     //
-    NFCORE_CREATETAXDB (
+    NFCORE_CREATETAXDB(
         PIPELINE_INITIALISATION.out.samplesheet
     )
     //
     // SUBWORKFLOW: Run completion tasks
     //
-    PIPELINE_COMPLETION (
+    PIPELINE_COMPLETION(
         params.email,
         params.email_on_fail,
         params.plaintext_email,
@@ -103,6 +59,41 @@ workflow {
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    THE END
+    NAMED WORKFLOWS FOR PIPELINE
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
+
+//
+// WORKFLOW: Run main analysis pipeline depending on type of input
+//
+workflow NFCORE_CREATETAXDB {
+    take:
+    samplesheet // channel: samplesheet read in from --input
+
+    main:
+
+    //
+    // WORKFLOW: Run pipeline
+    //
+    ch_samplesheet = samplesheet
+    ch_taxonomy_namesdmp = file(params.namesdmp, checkIfExists: true)
+    ch_taxonomy_nodesdmp = file(params.nodesdmp, checkIfExists: true)
+    ch_accession2taxid = file(params.accession2taxid, checkIfExists: true)
+    ch_nucl2taxid = file(params.nucl2taxid, checkIfExists: true)
+    ch_prot2taxid = file(params.prot2taxid, checkIfExists: true)
+    ch_malt_mapdb = file(params.malt_mapdb, checkIfExists: true)
+
+
+    CREATETAXDB(
+        ch_samplesheet,
+        ch_taxonomy_namesdmp,
+        ch_taxonomy_nodesdmp,
+        ch_accession2taxid,
+        ch_nucl2taxid,
+        ch_prot2taxid,
+        ch_malt_mapdb
+    )
+
+    emit:
+    multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html
+}
diff --git a/tests/test.nf.test b/tests/test.nf.test
index 76a073c..c8c6632 100644
--- a/tests/test.nf.test
+++ b/tests/test.nf.test
@@ -23,6 +23,8 @@ nextflow_pipeline {
                         file("$outputDir/bracken/database/database.kraken").name,
                         path("$outputDir/centrifuge/"),
                         path("$outputDir/diamond/database.dmnd"),
+                        path("$outputDir/ganon/database.hibf"),
+                        path("$outputDir/ganon/database.tax"),
                         path("$outputDir/kaiju/database.fmi"),
                         path("$outputDir/kraken2/database/hash.k2d"),
                         file("$outputDir/kraken2/database/opts.k2d").name,
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index 704faa8..3b82d50 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -79,7 +79,7 @@ workflow CREATETAXDB {
         // Place in single file
         CAT_CAT_DNA(ch_prepped_dna_fastas)
         ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first())
-        ch_singleref_for_dna = CAT_CAT_DNA.out
+        ch_singleref_for_dna = CAT_CAT_DNA.out.file_out
     }
 
     // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju...
@@ -105,7 +105,7 @@ workflow CREATETAXDB {
         //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() )
 
         CAT_CAT_AA(ch_prepped_aa_fastas)
-        ch_singleref_for_aa = CAT_CAT_AA.out_file
+        ch_singleref_for_aa = CAT_CAT_AA.out.file_out
         ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first())
     }
 
@@ -161,6 +161,9 @@ workflow CREATETAXDB {
         ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first())
         ch_ganon_output = GANON_BUILDCUSTOM.out.db
     }
+    else {
+        ch_ganon_output = Channel.empty()
+    }
 
     // MODULE: Run KAIJU/MKFMI
 

From 565d6100d295057f3b56bce34dbf544069ff3b4e Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 28 Nov 2024 13:30:43 +0100
Subject: [PATCH 7/9] Update test

---
 tests/test.nf.test.snap | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap
index 0bd549f..dc60a3f 100644
--- a/tests/test.nf.test.snap
+++ b/tests/test.nf.test.snap
@@ -11,6 +11,8 @@
                 "database.4.cf:md5,2902ec5df0db6da41a91b40d2f46b30d"
             ],
             "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462",
+            "database.hibf:md5,af913cecda744b02751e2f5320c35c7c",
+            "database.tax:md5,30f327fbe453aa1a981363fd9f4df21b",
             "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598",
             "hash.k2d:md5,941118164b4bcc010593f7a7c7b30029",
             "opts.k2d",
@@ -30,8 +32,8 @@
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-08T16:33:06.699148849"
+        "timestamp": "2024-11-28T13:27:57.851046024"
     }
 }
\ No newline at end of file

From d096cee0d55392356dcae61879fc7349d8adb21c Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 28 Nov 2024 13:47:01 +0100
Subject: [PATCH 8/9] Add current required documentation

---
 CITATIONS.md                                    |  4 ++++
 README.md                                       |  1 +
 docs/output.md                                  | 17 ++++++++++++++++-
 .../utils_nfcore_createtaxdb_pipeline/main.nf   |  2 ++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 354566c..1e56a20 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -40,6 +40,10 @@
 
   > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. https://doi.org/10.7717/peerj-cs.104
 
+- [ganon](https://doi.org/10.1093/bioinformatics/btaa458)
+
+  > Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. https://doi.org/10.1093/bioinformatics/btaa458
+
 - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
 
   > Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. https://doi.org/10.1101/gr.210641.116
diff --git a/README.md b/README.md
index 16bd3fa..a5fffbe 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,7 @@
 2. Builds databases for:
    - [Bracken](https://doi.org/10.7717/peerj-cs.104)
    - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
+   - [ganon](https://doi.org/10.1093/bioinformatics/btaa458)
    - [DIAMOND](https://doi.org/10.1038/nmeth.3176)
    - [Kaiju](https://doi.org/10.1038/ncomms11257)
    - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
diff --git a/docs/output.md b/docs/output.md
index 9f5ddb0..762991f 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -14,7 +14,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
-- [Bracken](#bracken) - Database files for Brakcen
+- [Bracken](#bracken) - Database files for Bracken
+- [ganon](#ganon) - Database files for ganon
 - [Centrifuge](#centrifuge) - Database files for Centrifuge
 - [DIAMOND](#diamond) - Database files for DIAMOND
 - [Kaiju](#kaiju) - Database files for Kaiju
@@ -92,6 +93,20 @@ The resulting `<db_name>/` directory can be given to Bracken itself with `bracke
 
 A directory and `cf` files can be given to the Centrifuge command with `centrifuge -x /<path>/<to>/<cf_files_basename>` etc.
 
+### Ganon
+
+[ganon](https://github.com/pirovc/ganon/) classifies genomic sequences against large sets of references efficiently, with integrated download and update of databases (refseq/genbank), taxonomic profiling (ncbi/gtdb), binning and hierarchical classification, customized reporting and more.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `diamond/`
+  - `<database>.hibf`: main bloom filter index file
+  - `<database>.tax`: taxonomy tree used for taxonomy assignment
+  </details>
+
+The directory containing these two files can be given to ganon itself with using the name as a prefix, e.g., `ganon classify -d /<path>/<to>/<database name without extensions>`.
+
 ### Diamond
 
 [DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment.
diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
index 589b799..ce1edde 100644
--- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
@@ -186,6 +186,7 @@ def toolCitationText() {
         "Tools used in the workflow included:",
         params.build_bracken ? "Bracken (Lu et al. 2017)," : "",
         params.build_centrifuge ? "Centrifuge (Kim et al. 2016)," : "",
+        params.build_ganon ? "ganon (Piro et al. 2020)" : "",
         params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "",
         params.build_kaiju ? "Kaiju (Menzel et al. 2016)," : "",
         params.build_kraken2 ? "Kraken2 (Wood et al. 2019)," : "",
@@ -205,6 +206,7 @@ def toolBibliographyText() {
     def reference_text = [
         params.build_bracken ? '<li>Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. <a href="https://doi.org/10.7717/peerj-cs.104">10.7717/peerj-cs.104</a></li>' : "",
         params.build_centrifuge ? '<li>Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729.  <a href="https://doi.org/10.1101/gr.210641.116">10.1101/gr.210641.116</a></li>' : "",
+        params.build_ganon ? "<li>Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. <a href=\"https://doi.org/10.1093/bioinformatics/btaa458\">10.1093/bioinformatics/btaa458</a></li>" : "",
         params.build_diamond ? '<li>Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. <a href="https://doi.org/10.1038/nmeth.3176">10.1038/nmeth.3176</a></li>' : "",
         params.build_kaiju ? '<li>Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. <a href="https://doi.org/10.1038/ncomms11257">10.1038/ncomms11257</a></li>' : "",
         params.build_kraken2 ? '<li>Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257.  <a href="https://doi.org/10.1186/s13059-019-1891-0">10.1186/s13059-019-1891-0</a></li>' : "",

From 36b57423585e6a8e036ac203425f4ec6a1dcc998 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 28 Nov 2024 14:01:03 +0100
Subject: [PATCH 9/9] Fix mangled regex strings

---
 subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
index ce1edde..7fb98b7 100644
--- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
@@ -245,7 +245,7 @@ def methodsDescriptionText(mqc_methods_yaml) {
     meta["tool_citations"] = ""
     meta["tool_bibliography"] = ""
 
-    meta["tool_citations"] = toolCitationText().replaceAll(', .', ".").replaceAll('. .', ' .').replaceAll(', .', '.')
+    meta["tool_citations"] = toolCitationText().replaceAll(', \\.', ".").replaceAll('. \\.', ' .').replaceAll(', \\.', '.')
     meta["tool_bibliography"] = toolBibliographyText()