From e37c84d69b73dc9dc2075937987bf9a8194aaa6a Mon Sep 17 00:00:00 2001
From: alxndrdiaz <ra.ramos.diaz@gmail.com>
Date: Thu, 2 May 2024 18:01:24 -0600
Subject: [PATCH 1/8] include fasta_build_add_kraken2_bracken

---
 modules.json                                  |  13 +-
 modules/nf-core/bracken/build/environment.yml |   8 +
 modules/nf-core/bracken/build/main.nf         |  48 ++++++
 modules/nf-core/bracken/build/meta.yml        |  47 +++++
 .../nf-core/bracken/build/tests/main.nf.test  |  72 ++++++++
 .../bracken/build/tests/main.nf.test.snap     |  84 +++++++++
 modules/nf-core/bracken/build/tests/tags.yml  |   2 +
 nextflow.config                               |   1 +
 .../nf-core/fasta_build_add_kraken2/main.nf   |  35 ----
 .../tests/main.nf.test                        | 103 -----------
 .../tests/main.nf.test.snap                   |  35 ----
 .../fasta_build_add_kraken2/tests/tags.yml    |   2 -
 .../fasta_build_add_kraken2_bracken/main.nf   |  42 +++++
 .../meta.yml                                  |  26 ++-
 .../tests/main.nf.test                        | 160 ++++++++++++++++++
 .../tests/main.nf.test.snap                   |  58 +++++++
 .../tests/tags.yml                            |   2 +
 workflows/createtaxdb.nf                      |   9 +-
 18 files changed, 556 insertions(+), 191 deletions(-)
 create mode 100644 modules/nf-core/bracken/build/environment.yml
 create mode 100644 modules/nf-core/bracken/build/main.nf
 create mode 100644 modules/nf-core/bracken/build/meta.yml
 create mode 100644 modules/nf-core/bracken/build/tests/main.nf.test
 create mode 100644 modules/nf-core/bracken/build/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/bracken/build/tests/tags.yml
 delete mode 100644 subworkflows/nf-core/fasta_build_add_kraken2/main.nf
 delete mode 100644 subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test
 delete mode 100644 subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test.snap
 delete mode 100644 subworkflows/nf-core/fasta_build_add_kraken2/tests/tags.yml
 create mode 100644 subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf
 rename subworkflows/nf-core/{fasta_build_add_kraken2 => fasta_build_add_kraken2_bracken}/meta.yml (61%)
 create mode 100644 subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test
 create mode 100644 subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap
 create mode 100644 subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml

diff --git a/modules.json b/modules.json
index cb08493..0b6df73 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "bracken/build": {
+                        "branch": "master",
+                        "git_sha": "dcbe6e77bc6cc0843ce93e6c7bd884d65c215984",
+                        "installed_by": ["fasta_build_add_kraken2_bracken"]
+                    },
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
@@ -38,12 +43,12 @@
                     "kraken2/add": {
                         "branch": "master",
                         "git_sha": "ca87ad032a62f025f0c373facacef2df0c5411b2",
-                        "installed_by": ["fasta_build_add_kraken2"]
+                        "installed_by": ["fasta_build_add_kraken2_bracken"]
                     },
                     "kraken2/build": {
                         "branch": "master",
                         "git_sha": "ca87ad032a62f025f0c373facacef2df0c5411b2",
-                        "installed_by": ["fasta_build_add_kraken2"]
+                        "installed_by": ["fasta_build_add_kraken2_bracken"]
                     },
                     "malt/build": {
                         "branch": "master",
@@ -69,9 +74,9 @@
             },
             "subworkflows": {
                 "nf-core": {
-                    "fasta_build_add_kraken2": {
+                    "fasta_build_add_kraken2_bracken": {
                         "branch": "master",
-                        "git_sha": "a4d1e13a2da05307deb65a87d501aa6520162dcd",
+                        "git_sha": "9758e4dedd5788369e61b57e7d6f4751e682b17a",
                         "installed_by": ["subworkflows"]
                     },
                     "utils_nextflow_pipeline": {
diff --git a/modules/nf-core/bracken/build/environment.yml b/modules/nf-core/bracken/build/environment.yml
new file mode 100644
index 0000000..7288a38
--- /dev/null
+++ b/modules/nf-core/bracken/build/environment.yml
@@ -0,0 +1,8 @@
+---
+name: "bracken_build"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::bracken=2.9"
diff --git a/modules/nf-core/bracken/build/main.nf b/modules/nf-core/bracken/build/main.nf
new file mode 100644
index 0000000..a2ee2c8
--- /dev/null
+++ b/modules/nf-core/bracken/build/main.nf
@@ -0,0 +1,48 @@
+process BRACKEN_BUILD {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bracken:2.9--py38h2494328_0':
+        'biocontainers/bracken:2.9--py38h2494328_0' }"
+
+    input:
+    tuple val(meta), path(kraken2db)
+
+    output:
+    tuple val(meta), path(kraken2db               , includeInputs: true), emit: db
+    tuple val(meta), path("${kraken2db}/database*", includeInputs: true), emit: bracken_files
+    path "versions.yml"                                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    bracken-build \\
+        $args \\
+        -t $task.cpus \\
+        -d $kraken2db
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bracken: \$(echo \$(bracken -v) | cut -f2 -d'v')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${kraken2db}/database100mers.kmer_distrib
+    touch ${kraken2db}/database100mers.kraken
+    touch ${kraken2db}/database.kraken
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bracken: \$(echo \$(bracken -v) | cut -f2 -d'v')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/bracken/build/meta.yml b/modules/nf-core/bracken/build/meta.yml
new file mode 100644
index 0000000..2bce245
--- /dev/null
+++ b/modules/nf-core/bracken/build/meta.yml
@@ -0,0 +1,47 @@
+---
+name: "bracken_build"
+description: Extends a Kraken2 database to be compatible with Bracken
+keywords:
+  - kraken2
+  - bracken
+  - database
+  - build
+tools:
+  - "bracken":
+      description: "Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample."
+      homepage: "https://ccb.jhu.edu/software/bracken/"
+      documentation: "https://ccb.jhu.edu/software/bracken/"
+      tool_dev_url: "https://github.com/jenniferlu717/Bracken/"
+      doi: "10.7717/peerj-cs.104 "
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - kraken2db:
+      type: directory
+      description: A Kraken2 database directory
+      pattern: "*/"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - db:
+      type: directory
+      description: A Kraken2 database directory with required bracken files in side
+      pattern: "*/"
+
+authors:
+  - "@jfy133"
+maintainers:
+  - "@jfy133"
diff --git a/modules/nf-core/bracken/build/tests/main.nf.test b/modules/nf-core/bracken/build/tests/main.nf.test
new file mode 100644
index 0000000..f4168a7
--- /dev/null
+++ b/modules/nf-core/bracken/build/tests/main.nf.test
@@ -0,0 +1,72 @@
+nextflow_process {
+
+    name "Test Process BRACKEN_BUILD"
+    script "../main.nf"
+    process "BRACKEN_BUILD"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "bracken"
+    tag "bracken/build"
+    tag "untar"
+
+    setup {
+        run ("UNTAR") {
+            script "../../../untar/main.nf"
+            process {
+                """
+                input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)]
+                """
+            }
+        }
+    }
+
+    test("kraken2 - db") {
+
+        when {
+            process {
+                """
+                input[0] = UNTAR.out.untar
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                        file("${process.out.db[0][1]}/database100mers.kmer_distrib").name,
+                        file("${process.out.db[0][1]}/database100mers.kraken").name,
+                        file("${process.out.db[0][1]}/database.kraken").name,
+                        file("${process.out.bracken_files[0][1]}/database100mers.kmer_distrib").name,
+                        file("${process.out.bracken_files[0][1]}/database100mers.kraken").name,
+                        file("${process.out.bracken_files[0][1]}/database.kraken").name,
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+    test("kraken2 - db - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = UNTAR.out.untar
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/bracken/build/tests/main.nf.test.snap b/modules/nf-core/bracken/build/tests/main.nf.test.snap
new file mode 100644
index 0000000..49f4240
--- /dev/null
+++ b/modules/nf-core/bracken/build/tests/main.nf.test.snap
@@ -0,0 +1,84 @@
+{
+    "kraken2 - db": {
+        "content": [
+            "database100mers.kmer_distrib",
+            "database100mers.kraken",
+            "database.kraken",
+            "database100mers.kmer_distrib",
+            "database100mers.kraken",
+            "database.kraken"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-17T18:41:03.693430543"
+    },
+    "kraken2 - db - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "db"
+                        },
+                        [
+                            "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "file.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "db"
+                        },
+                        [
+                            "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,925c6ae1387eaf6dbd14656125bc6d9b"
+                ],
+                "bracken_files": [
+                    [
+                        {
+                            "id": "db"
+                        },
+                        [
+                            "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "db": [
+                    [
+                        {
+                            "id": "db"
+                        },
+                        [
+                            "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "file.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,925c6ae1387eaf6dbd14656125bc6d9b"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-17T18:41:14.406736156"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bracken/build/tests/tags.yml b/modules/nf-core/bracken/build/tests/tags.yml
new file mode 100644
index 0000000..92d7c26
--- /dev/null
+++ b/modules/nf-core/bracken/build/tests/tags.yml
@@ -0,0 +1,2 @@
+bracken/build:
+  - "modules/nf-core/bracken/build/**"
diff --git a/nextflow.config b/nextflow.config
index 4725942..56b7e6f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -71,6 +71,7 @@ params {
     build_centrifuge         = false
     build_kraken2            = false
     kraken2_keepintermediate = false
+    run_bracken              = false       
 
 }
 
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2/main.nf b/subworkflows/nf-core/fasta_build_add_kraken2/main.nf
deleted file mode 100644
index 306896b..0000000
--- a/subworkflows/nf-core/fasta_build_add_kraken2/main.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-include { KRAKEN2_ADD   } from '../../../modules/nf-core/kraken2/add/main'
-include { KRAKEN2_BUILD } from '../../../modules/nf-core/kraken2/build/main'
-
-workflow FASTA_BUILD_ADD_KRAKEN2 {
-
-    take:
-    ch_fasta              // channel: [ val(meta), fasta ]
-    ch_taxonomy_names     // channel: [ names.dmp ]
-    ch_taxonomy_nodes     // channel: [ nodes.dmp ]
-    ch_accession2taxid    // channel: [ acc2taxidfile ]
-    val_cleanintermediate // value: [ true | false ]
-
-    main:
-
-    ch_versions = Channel.empty()
-
-    ch_fastas_for_kraken2add = ch_fasta
-                                .map {
-                                    meta, fasta ->
-
-                                    [[id: 'db'], fasta]
-                                }
-                                .groupTuple()
-
-    KRAKEN2_ADD ( ch_fastas_for_kraken2add, ch_taxonomy_names, ch_taxonomy_nodes, ch_accession2taxid )
-    ch_versions = ch_versions.mix(KRAKEN2_ADD.out.versions.first())
-
-    KRAKEN2_BUILD ( KRAKEN2_ADD.out.db, val_cleanintermediate )
-    ch_versions = ch_versions.mix(KRAKEN2_BUILD.out.versions.first())
-
-    emit:
-    db = KRAKEN2_BUILD.out.db // channel: [ val(meta), [ db ] ]
-    versions = ch_versions    // channel: [ versions.yml ]
-}
-
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test b/subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test
deleted file mode 100644
index a7baca6..0000000
--- a/subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test
+++ /dev/null
@@ -1,103 +0,0 @@
-nextflow_workflow {
-
-    name "Test Subworkflow FASTA_BUILD_ADD_KRAKEN2"
-    script "../main.nf"
-    workflow "FASTA_BUILD_ADD_KRAKEN2"
-
-    tag "subworkflows"
-    tag "subworkflows_nfcore"
-    tag "subworkflows/fasta_build_add_kraken2"
-    tag "gunzip"
-    tag "kraken2"
-    tag "kraken2/add"
-    tag "kraken2/build"
-
-    test("metagenome - fasta") {
-
-        setup {
-            run("GUNZIP") {
-                script "modules/nf-core/gunzip/main.nf"
-                process {
-                    """
-                    input[0] = Channel.of([\
-                                [id:'haemophilus_influenzae'],
-                                file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true)
-                            ]
-                    )
-                    """
-                }
-            }
-        }
-
-        when {
-            workflow {
-                """
-                input[0] = Channel.of([[id:'sarscov2'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip)
-                input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
-                input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
-                input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
-                input[4] = true
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert workflow.success},
-                { assert workflow.out.db.get(0).get(1) ==~ ".*/db" },
-                { assert snapshot (
-                        workflow.out.versions,
-                        path("${workflow.out.db[0][1]}/hash.k2d"),
-                        path("${workflow.out.db[0][1]}/taxo.k2d"),
-                        file("${workflow.out.db[0][1]}/opts.k2d").name,
-                        ).match()
-                }
-            )
-        }
-    }
-
-    test("metagenome - fasta - nocleanup") {
-
-        setup {
-            run("GUNZIP") {
-                script "modules/nf-core/gunzip/main.nf"
-                process {
-                    """
-                    input[0] = Channel.of([\
-                                [id:'haemophilus_influenzae'],
-                                file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true)
-                            ]
-                    )
-                    """
-                }
-            }
-        }
-
-        when {
-            workflow {
-                """
-                input[0] = Channel.of([[id:'sarscov2'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip)
-                input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
-                input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
-                input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
-                input[4] = false
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert workflow.success},
-                { assert workflow.out.db.get(0).get(1) ==~ ".*/db" },
-                { assert snapshot (
-                        workflow.out.versions,
-                        path("${workflow.out.db[0][1]}/hash.k2d"),
-                        path("${workflow.out.db[0][1]}/taxo.k2d"),
-                        file("${workflow.out.db[0][1]}/opts.k2d").name,
-                        file("${workflow.out.db[0][1]}/unmapped.txt").name
-                        ).match()
-                }
-            )
-        }
-    }
-}
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test.snap
deleted file mode 100644
index 57b4a48..0000000
--- a/subworkflows/nf-core/fasta_build_add_kraken2/tests/main.nf.test.snap
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-    "metagenome - fasta - nocleanup": {
-        "content": [
-            [
-                "versions.yml:md5,62fb719633dd8f110bbc2c1bec53d0a9",
-                "versions.yml:md5,82f39c3ef1ba0742da3105cbe5ed3cf7"
-            ],
-            "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372",
-            "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816",
-            "opts.k2d",
-            "unmapped.txt"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-04-05T08:17:49.670974771"
-    },
-    "metagenome - fasta": {
-        "content": [
-            [
-                "versions.yml:md5,62fb719633dd8f110bbc2c1bec53d0a9",
-                "versions.yml:md5,82f39c3ef1ba0742da3105cbe5ed3cf7"
-            ],
-            "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372",
-            "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816",
-            "opts.k2d"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-04-05T08:17:31.501399396"
-    }
-}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2/tests/tags.yml b/subworkflows/nf-core/fasta_build_add_kraken2/tests/tags.yml
deleted file mode 100644
index af5f2a0..0000000
--- a/subworkflows/nf-core/fasta_build_add_kraken2/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-subworkflows/fasta_build_add_kraken2:
-  - subworkflows/nf-core/fasta_build_add_kraken2/**
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf
new file mode 100644
index 0000000..cb9e370
--- /dev/null
+++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf
@@ -0,0 +1,42 @@
+include { KRAKEN2_ADD   } from '../../../modules/nf-core/kraken2/add/main'
+include { KRAKEN2_BUILD } from '../../../modules/nf-core/kraken2/build/main'
+include { BRACKEN_BUILD } from '../../../modules/nf-core/bracken/build/main'
+
+workflow FASTA_BUILD_ADD_KRAKEN2_BRACKEN {
+
+    take:
+    ch_fasta                 // channel: [ val(meta), [ fasta1, fasta2, fasta3] ]
+    ch_taxonomy_names        // channel: [ names.dmp ]
+    ch_taxonomy_nodes        // channel: [ nodes.dmp ]
+    ch_accession2taxid       // channel: [ acc2taxidfile ]
+    val_cleanintermediates   // value:   [ true | false ]
+    val_runbrackenbuild      // value:   [ true | false ]
+
+    main:
+
+    if ( val_cleanintermediates && val_runbrackenbuild ) { error("Cannot perform Kraken2 cleanup and build Bracken database. Bracken requires intermediate files") }
+    val_cleanup = [ val_cleanintermediates && !val_runbrackenbuild ].any() ? true : false
+
+    ch_versions = Channel.empty()
+
+    KRAKEN2_ADD ( ch_fasta, ch_taxonomy_names, ch_taxonomy_nodes, ch_accession2taxid )
+    ch_versions = ch_versions.mix( KRAKEN2_ADD.out.versions.first() )
+
+    KRAKEN2_BUILD ( KRAKEN2_ADD.out.db, val_cleanup )
+    ch_versions = ch_versions.mix( KRAKEN2_BUILD.out.versions.first() )
+
+    if ( val_runbrackenbuild ) {
+        BRACKEN_BUILD ( KRAKEN2_BUILD.out.db )
+        ch_final_db = BRACKEN_BUILD.out.db
+        ch_versions = ch_versions.mix( BRACKEN_BUILD.out.versions.first() )
+    }
+    else {
+        ch_final_db = KRAKEN2_BUILD.out.db
+        ch_versions = ch_versions.mix( KRAKEN2_BUILD.out.versions.first() )
+    }
+
+    emit:
+    db = ch_final_db // channel: [ val(meta), [ db ] ]
+    versions = ch_versions    // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2/meta.yml b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/meta.yml
similarity index 61%
rename from subworkflows/nf-core/fasta_build_add_kraken2/meta.yml
rename to subworkflows/nf-core/fasta_build_add_kraken2_bracken/meta.yml
index 1506709..8125c60 100644
--- a/subworkflows/nf-core/fasta_build_add_kraken2/meta.yml
+++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/meta.yml
@@ -1,21 +1,23 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
-name: "fasta_build_add_kraken2"
-description: KRAKEN2 build custom database subworkflow
+name: "fasta_build_add_kraken2_bracken"
+description: KRAKEN2 and BRACKEN build custom database subworkflow
 keywords:
   - metagenomics
   - kraken2
   - database
   - build
   - custom
+  - bracken
 components:
   - kraken2/add
   - kraken2/build
+  - bracken/build
 input:
   - ch_fasta:
       type: file
       description: |
-        Channel containing each fasta as a distinct element with meta
-        Structure: [ val(meta), path(fasta) ]
+        Channel containing a meta with a list of FASTAs to be built
+        Structure: [ val(meta), [ fasta1, fasta2, fasta3 ] ]
       pattern: "*.{fasta,fa,fna}"
   - ch_taxonomy_names:
       type: file
@@ -35,18 +37,26 @@ input:
         Channel containing a NCBI-style taxdump accession2taxid (acc2tax) file
         Structure: [ accession2taxid_file ]
       pattern: "*.accession2taxid"
-  - val_cleanintermediate:
+  - val_cleanintermediates:
       type: boolean
       description: |
-        Boolean flag whether to clean up intermediate files after build or not
+        Boolean flag whether to clean up intermediate files after build or not.
+        If val_runbrackenbuild set, will be ignored as BRACKEN requires intermediate files.
         Structure: [ val_cleanintermediate ]
       pattern: "true|false"
+  - val_runbrackenbuild:
+      type: boolean
+      description: |
+        Boolean flag whether to additionally insert required BRACKEN database files into KRAKEN2 directory.
+        Note any changes for k-mer or read lengths must come via Nextflow config `ext.args`.
+        Structure: [ val_runbrackenbuild ]
+      pattern: "true|false"
 output:
   - db:
       type: directory
       description: |
-        Channel containing KRAKEN2 database directory.
-        Use `$ext.prefix` in a modules.conf file to change default name
+        Channel containing KRAKEN2 (and BRACKEN) database directory files.
+        Use `ext.prefix` in a modules.conf file to change default name
         Structure: [ val(meta), path(db) ]
       pattern: "*/"
   - versions:
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test
new file mode 100644
index 0000000..94bc184
--- /dev/null
+++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test
@@ -0,0 +1,160 @@
+nextflow_workflow {
+
+    name "Test Subworkflow FASTA_BUILD_ADD_KRAKEN2_BRACKEN"
+    script "../main.nf"
+    workflow "FASTA_BUILD_ADD_KRAKEN2_BRACKEN"
+
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/fasta_build_add_kraken2_bracken"
+    tag "gunzip"
+    tag "kraken2"
+    tag "kraken2/add"
+    tag "kraken2/build"
+    tag "bracken/build"
+
+    test("metagenome - nocleanup - nobracken - fasta") {
+
+        setup {
+            run("GUNZIP") {
+                script "modules/nf-core/gunzip/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                                [id:'db'],
+                                file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true)
+                            ]
+                    )
+                    """
+                }
+            }
+        }
+
+        when {
+            workflow {
+                """
+                input[0] = Channel.of([[id:'db'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip).groupTuple()
+                input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
+                input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
+                input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot (
+                        workflow.out.versions,
+                        path("${workflow.out.db[0][1]}/hash.k2d"),
+                        path("${workflow.out.db[0][1]}/taxo.k2d"),
+                        file("${workflow.out.db[0][1]}/opts.k2d").name,
+                        ).match()
+                },
+                { assert path("${workflow.out.db[0][1]}/library/").exists() },
+                { assert path("${workflow.out.db[0][1]}/taxonomy/").exists() }
+            )
+        }
+    }
+
+    test("metagenome - withcleanup - nobracken - fasta") {
+
+        setup {
+            run("GUNZIP") {
+                script "modules/nf-core/gunzip/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([\
+                                [id:'db'],
+                                file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true)
+                            ]
+                    )
+                    """
+                }
+            }
+        }
+
+        when {
+            workflow {
+                """
+                input[0] = Channel.of([[id:'db'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip).groupTuple()
+                input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
+                input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
+                input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
+                input[4] = true
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert workflow.out.db.get(0).get(1) ==~ ".*/db" },
+                { assert snapshot (
+                        workflow.out.versions,
+                        path("${workflow.out.db[0][1]}/hash.k2d"),
+                        path("${workflow.out.db[0][1]}/taxo.k2d"),
+                        file("${workflow.out.db[0][1]}/opts.k2d").name,
+                        file("${workflow.out.db[0][1]}/unmapped.txt").name
+                        ).match()
+                },
+                { assert !path("${workflow.out.db[0][1]}/library/").exists() },
+                { assert !path("${workflow.out.db[0][1]}/taxonomy/").exists() }
+            )
+        }
+    }
+
+test("metagenome - nocleanup - withbracken - fasta") {
+
+        setup {
+            run("GUNZIP") {
+                script "modules/nf-core/gunzip/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([\
+                                [id:'db'],
+                                file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true)
+                            ]
+                    )
+                    """
+                }
+            }
+        }
+
+        when {
+            workflow {
+                """
+                input[0] = Channel.of([[id:'db'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip).groupTuple()
+                input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
+                input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
+                input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
+                input[4] = false
+                input[5] = true
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert workflow.out.db.get(0).get(1) ==~ ".*/db" },
+                { assert path("${workflow.out.db[0][1]}/library/").exists() },
+                { assert path("${workflow.out.db[0][1]}/taxonomy/").exists() },
+                { assert snapshot (
+                        workflow.out.versions,
+                        path("${workflow.out.db[0][1]}/hash.k2d"),
+                        path("${workflow.out.db[0][1]}/taxo.k2d"),
+                        file("${workflow.out.db[0][1]}/opts.k2d").name,
+                        file("${workflow.out.db[0][1]}/unmapped.txt").name,
+                        file("${workflow.out.db[0][1]}/database100mers.kmer_distrib").name,
+                        file("${workflow.out.db[0][1]}/database100mers.kraken").name,
+                        file("${workflow.out.db[0][1]}/database.kraken").name
+                        ).match()
+                }
+            )
+        }
+    }
+}
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap
new file mode 100644
index 0000000..9ad0c78
--- /dev/null
+++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap
@@ -0,0 +1,58 @@
+{
+    "metagenome - nocleanup - nobracken - fasta": {
+        "content": [
+            [
+                "versions.yml:md5,b5f92f68a6af1f422ccc1a5c75178793",
+                "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae",
+                "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae"
+            ],
+            "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372",
+            "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816",
+            "opts.k2d"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-18T09:04:48.196774778"
+    },
+    "metagenome - withcleanup - nobracken - fasta": {
+        "content": [
+            [
+                "versions.yml:md5,b5f92f68a6af1f422ccc1a5c75178793",
+                "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae",
+                "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae"
+            ],
+            "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372",
+            "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816",
+            "opts.k2d",
+            "unmapped.txt"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-18T08:42:24.87325051"
+    },
+    "metagenome - nocleanup - withbracken - fasta": {
+        "content": [
+            [
+                "versions.yml:md5,130d220d293e4f75863b6c0756bb8324",
+                "versions.yml:md5,b5f92f68a6af1f422ccc1a5c75178793",
+                "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae"
+            ],
+            "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372",
+            "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816",
+            "opts.k2d",
+            "unmapped.txt",
+            "database100mers.kmer_distrib",
+            "database100mers.kraken",
+            "database.kraken"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-18T10:58:01.065026262"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml
new file mode 100644
index 0000000..40273bf
--- /dev/null
+++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fasta_build_add_kraken2_bracken:
+  - subworkflows/nf-core/fasta_build_add_kraken2_bracken/**
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index ea8c59a..01317a3 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -21,7 +21,7 @@ include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pi
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline'
 
-include { FASTA_BUILD_ADD_KRAKEN2 } from '../subworkflows/nf-core/fasta_build_add_kraken2/main'
+include { FASTA_BUILD_ADD_KRAKEN2_BRACKEN } from '../subworkflows/nf-core/fasta_build_add_kraken2_bracken/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -142,10 +142,11 @@ workflow CREATETAXDB {
     }
 
     // SUBWORKFLOW: Kraken2
+    // Bracken requires intermediate files, if run_bracken=true then kraken2_keepintermediate=true, otherwise an error will be raised
     if ( params.build_kraken2 ) {
-        FASTA_BUILD_ADD_KRAKEN2 ( CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, !params.kraken2_keepintermediate )
-        ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2.out.versions.first())
-        ch_kraken2_output = FASTA_BUILD_ADD_KRAKEN2.out.db
+        FASTA_BUILD_ADD_KRAKEN2_BRACKEN ( CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, !params.kraken2_keepintermediate, params.run_bracken )
+        ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first())
+        ch_kraken2_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db
     } else {
         ch_kraken2_output = Channel.empty()
     }

From 130d8d2b872572132de3cc52231198dd97cebb15 Mon Sep 17 00:00:00 2001
From: alxndrdiaz <ra.ramos.diaz@gmail.com>
Date: Thu, 2 May 2024 18:11:14 -0600
Subject: [PATCH 2/8] add run_bracken

---
 nextflow_schema.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index b1344ea..52e702e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -145,6 +145,11 @@
                     "type": "boolean",
                     "fa_icon": "fas fa-save",
                     "description": "Retain intermediate Kraken2 build files for inspection."
+                },
+                "run_bracken": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-save",
+                    "description": "Generate Bracken files required for abundance estimation."
                 }
             },
             "fa_icon": "fas fa-database"

From 312b878208cc2c0665d90b04de0cb56f643cee06 Mon Sep 17 00:00:00 2001
From: alxndrdiaz <ra.ramos.diaz@gmail.com>
Date: Thu, 2 May 2024 18:26:08 -0600
Subject: [PATCH 3/8] update utils_nfcore_pipeline

---
 modules.json                                       | 2 +-
 subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/modules.json b/modules.json
index 0b6df73..a318afd 100644
--- a/modules.json
+++ b/modules.json
@@ -86,7 +86,7 @@
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
-                        "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+                        "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
                         "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
index a8b55d6..14558c3 100644
--- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) {
 // Citation string for pipeline
 //
 def workflowCitation() {
+    def temp_doi_ref = ""
+    String[] manifest_doi = workflow.manifest.doi.tokenize(",")
+    // Using a loop to handle multiple DOIs
+    // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+    // Removing ` ` since the manifest.doi is a string and not a proper list
+    for (String doi_ref: manifest_doi) temp_doi_ref += "  https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
     return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
         "* The pipeline\n" +
-        "  ${workflow.manifest.doi}\n\n" +
+        temp_doi_ref + "\n" +
         "* The nf-core framework\n" +
         "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
         "* Software dependencies\n" +

From 23a8c1bfd58f3394991a6101e0aa8db036a2518b Mon Sep 17 00:00:00 2001
From: alxndrdiaz <ra.ramos.diaz@gmail.com>
Date: Thu, 2 May 2024 18:37:08 -0600
Subject: [PATCH 4/8] remove trailing whitespace

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 56b7e6f..5880434 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -71,7 +71,7 @@ params {
     build_centrifuge         = false
     build_kraken2            = false
     kraken2_keepintermediate = false
-    run_bracken              = false       
+    run_bracken              = false      
 
 }
 

From 9673e38450196bd912ec26a9dffef021216ca53c Mon Sep 17 00:00:00 2001
From: alxndrdiaz <ra.ramos.diaz@gmail.com>
Date: Thu, 2 May 2024 18:40:47 -0600
Subject: [PATCH 5/8] remove whitespace

---
 nextflow.config | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 5880434..96d52c4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -71,8 +71,7 @@ params {
     build_centrifuge         = false
     build_kraken2            = false
     kraken2_keepintermediate = false
-    run_bracken              = false      
-
+    run_bracken              = false
 }
 
 // Load base.config by default for all pipelines

From 3e2377cc9a33194279a52c0f8b74dbe4efe28c21 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 23 May 2024 07:31:35 +0000
Subject: [PATCH 6/8] Get build running correctly with keeping intermediate
 logic,

---
 conf/test.config         |  1 +
 conf/test_nothing.config |  1 +
 nextflow.config          |  2 +-
 nextflow_schema.json     |  2 +-
 workflows/createtaxdb.nf | 12 +++++++-----
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index c1be203..a817d27 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -29,6 +29,7 @@ params {
     build_malt       = true
     build_centrifuge = true
     build_kraken2    = true
+    build_bracken    = true
 
     accession2taxid  = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl_gb.accession2taxid'
     nucl2taxid       = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl2tax.map'
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index 72c07e3..4fd0e0f 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -30,5 +30,6 @@ params {
     build_malt       = false
     build_centrifuge = false
     build_kraken2    = false
+    build_bracken    = false
 
 }
diff --git a/nextflow.config b/nextflow.config
index b036851..648516b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -72,7 +72,7 @@ params {
     build_centrifuge         = false
     build_kraken2            = false
     kraken2_keepintermediate = false
-    run_bracken              = false
+    build_bracken            = false
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e318efd..99b1445 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -146,7 +146,7 @@
                     "fa_icon": "fas fa-save",
                     "description": "Retain intermediate Kraken2 build files for inspection."
                 },
-                "run_bracken": {
+                "build_bracken": {
                     "type": "boolean",
                     "fa_icon": "fas fa-save",
                     "description": "Generate Bracken files required for abundance estimation."
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index bd0b4cc..b461f4d 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -53,7 +53,7 @@ workflow CREATETAXDB {
 
     // PREPARE: Prepare input for single file inputs modules
 
-    if ( [params.build_malt, params.build_centrifuge, params.build_kraken2].any() ) {  // Pull just DNA sequences
+    if ( [params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken].any() ) {  // Pull just DNA sequences
 
         ch_dna_refs_for_singleref = ch_samplesheet
                                         .map{meta, fasta_dna, fasta_aa  -> [[id: params.dbname], fasta_dna]}
@@ -141,10 +141,12 @@ workflow CREATETAXDB {
         ch_kaiju_output = Channel.empty()
     }
 
-    // SUBWORKFLOW: Kraken2
-    // Bracken requires intermediate files, if run_bracken=true then kraken2_keepintermediate=true, otherwise an error will be raised
-    if ( params.build_kraken2 ) {
-        FASTA_BUILD_ADD_KRAKEN2_BRACKEN ( CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, !params.kraken2_keepintermediate, params.run_bracken )
+    // SUBWORKFLOW: Kraken2 and Bracken
+    // Bracken requires intermediate files, if build_bracken=true then kraken2_keepintermediate=true, otherwise an error will be raised
+    // Condition is inverted because subworkflow asks if you want to 'clean' (true) or not, but pipeline says to 'keep'
+    if ( params.build_kraken2 || params.build_bracken ) {
+        def k2_keepintermediates = params.kraken2_keepintermediate || params.build_bracken ? false : true
+        FASTA_BUILD_ADD_KRAKEN2_BRACKEN ( CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken )
         ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first())
         ch_kraken2_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db
     } else {

From 526ef0dd6705e8faa91241db59d3391f415d4907 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 23 May 2024 08:00:58 +0000
Subject: [PATCH 7/8] Get testing working

---
 conf/test.config         |  4 +++-
 conf/test_full.config    |  8 ++++++++
 nextflow.config          |  2 +-
 nextflow_schema.json     |  6 +++---
 tests/test.nf.test       |  8 +++++---
 tests/test.nf.test.snap  |  6 ++++--
 workflows/createtaxdb.nf | 18 +++++++++---------
 7 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index a817d27..4046af5 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,7 +22,9 @@ params {
     // Input data
     // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input         = params.pipelines_testdata_base_path + 'createtaxdb/samplesheets/test.csv'
+    input            = params.pipelines_testdata_base_path + 'createtaxdb/samplesheets/test.csv'
+
+    dbname           = "database"
 
     build_diamond    = true
     build_kaiju      = true
diff --git a/conf/test_full.config b/conf/test_full.config
index 591cce3..b43690d 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -18,4 +18,12 @@ params {
     // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
     input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+
+    build_diamond    = true
+    build_kaiju      = true
+    build_malt       = true
+    build_centrifuge = true
+    build_kraken2    = true
+    build_bracken    = true
+
 }
diff --git a/nextflow.config b/nextflow.config
index 648516b..15cec1c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,7 +54,7 @@ params {
     validate_params                  = true
 
     // General parameters
-    dbname                   = "database"
+    dbname                   = null
     save_concatenated_fastas = false
 
     accession2taxid = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 99b1445..853449f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": ["input", "outdir", "dbname"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -43,7 +43,6 @@
                 },
                 "dbname": {
                     "type": "string",
-                    "default": "database",
                     "description": "Specify name that resulting databases will be prefixed with.",
                     "fa_icon": "fas fa-id-badge"
                 },
@@ -149,7 +148,8 @@
                 "build_bracken": {
                     "type": "boolean",
                     "fa_icon": "fas fa-save",
-                    "description": "Generate Bracken files required for abundance estimation."
+                    "description": "Turn on extending of Kraken2 database to include Bracken files. Requires nucleotide FASTA File input.",
+                    "help_text": "Bracken2 databases are simply just a Kraken2 database with two additional files.\n\nNote however this requires a Kraken2 database _with_ intermediate files still in it, thus can result in large database directories."
                 }
             },
             "fa_icon": "fas fa-database"
diff --git a/tests/test.nf.test b/tests/test.nf.test
index 46dd23f..81a76d2 100644
--- a/tests/test.nf.test
+++ b/tests/test.nf.test
@@ -21,9 +21,11 @@ nextflow_pipeline {
                         path("$outputDir/centrifuge/"),
                         path("$outputDir/diamond/database.dmnd"),
                         path("$outputDir/kaiju/database.fmi"),
-                        path("$outputDir/kraken2/db/hash.k2d"),
-                        file("$outputDir/kraken2/db/opts.k2d").name,
-                        path("$outputDir/kraken2/db/taxo.k2d"),
+                        path("$outputDir/kraken2/database/hash.k2d"),
+                        file("$outputDir/kraken2/database/opts.k2d").name,
+                        path("$outputDir/kraken2/database/taxo.k2d"),
+                        file("$outputDir/bracken/database/database100mers.kmer_distrib").name,
+                        file("$outputDir/bracken/database/database100mers.kraken").name,
                         path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'),
                         path("$outputDir/malt/malt_index/index0.idx"),
                         path("$outputDir/malt/malt_index/ref.db"),
diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap
index 73e0d6d..bf8fe23 100644
--- a/tests/test.nf.test.snap
+++ b/tests/test.nf.test.snap
@@ -12,6 +12,8 @@
             "hash.k2d:md5,01122a04dcef29ceb3baa68a9f6e6ef5",
             "opts.k2d",
             "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88",
+            "database100mers.kmer_distrib",
+            "database100mers.kraken",
             true,
             "index0.idx:md5,876139dc930e68992cd2625e08bba48a",
             "ref.db:md5,377073f58a9f9b85acca59fcf21744a9",
@@ -23,8 +25,8 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "24.02.0"
+            "nextflow": "24.04.1"
         },
-        "timestamp": "2024-04-11T10:59:28.687364796"
+        "timestamp": "2024-05-23T08:00:31.799820635"
     }
 }
\ No newline at end of file
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
index b461f4d..0192d99 100644
--- a/workflows/createtaxdb.nf
+++ b/workflows/createtaxdb.nf
@@ -148,9 +148,9 @@ workflow CREATETAXDB {
         def k2_keepintermediates = params.kraken2_keepintermediate || params.build_bracken ? false : true
         FASTA_BUILD_ADD_KRAKEN2_BRACKEN ( CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken )
         ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first())
-        ch_kraken2_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db
+        ch_kraken2_bracken_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db
     } else {
-        ch_kraken2_output = Channel.empty()
+        ch_kraken2_bracken_output = Channel.empty()
     }
 
     // Module: Run MALT/BUILD
@@ -229,13 +229,13 @@ workflow CREATETAXDB {
     multiqc_report = MULTIQC.out.report.toList()
 
     emit:
-    versions            = ch_collated_versions
-    multiqc_report      = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
-    centrifuge_database = ch_centrifuge_output
-    diamond_database    = ch_diamond_output
-    kaiju_database      = ch_kaiju_output
-    kraken2_database    = ch_kraken2_output
-    malt_database       = ch_malt_output
+    versions                    = ch_collated_versions
+    multiqc_report              = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
+    centrifuge_database         = ch_centrifuge_output
+    diamond_database            = ch_diamond_output
+    kaiju_database              = ch_kaiju_output
+    kraken2_bracken_database    = ch_kraken2_bracken_output
+    malt_database               = ch_malt_output
 }
 
 /*

From 0d7ed76a7fe4e44f0729adfcbc341650bf34457c Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 23 May 2024 08:16:52 +0000
Subject: [PATCH 8/8] Add missing file in tests, and update docs eveywhere

---
 CITATIONS.md                                  |  4 +++
 README.md                                     |  3 +-
 docs/output.md                                | 29 +++++++++++++++++++
 .../utils_nfcore_createtaxdb_pipeline/main.nf |  2 ++
 tests/test.nf.test                            |  1 +
 tests/test.nf.test.snap                       |  3 +-
 6 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 8451b47..e9b5164 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -40,6 +40,10 @@
 
   > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.
 
+- [Bracken](https://doi.org/10.7717/peerj-cs.104)
+
+  > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. https://doi.org/10.7717/peerj-cs.104
+
 - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
 
   > Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. https://doi.org/10.1101/gr.210641.116
diff --git a/README.md b/README.md
index 7b1787c..c072d55 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@
 
 1. Prepares input FASTA files for building
 2. Builds databases for:
+   - [Bracken](https://doi.org/10.7717/peerj-cs.104)
    - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
    - [DIAMOND](https://doi.org/10.1038/nmeth.3176)
    - [Kaiju](https://doi.org/10.1038/ncomms11257)
@@ -84,7 +85,7 @@ For more details about the output files and reports, please refer to the
 
 ## Credits
 
-nf-core/createtaxdb was originally written by James A. Fellows Yates and the nf-core community.
+nf-core/createtaxdb was originally written by James A. Fellows Yates, Joon Klaps, Alexander Ramos Díaz and the nf-core community.
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
diff --git a/docs/output.md b/docs/output.md
index 5a54002..1a30100 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -14,6 +14,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
+- [Bracken](#bracken) - Database files for Brakcen
 - [Centrifuge](#centrifuge) - Database files for Centrifuge
 - [DIAMOND](#diamond) - Database files for DIAMOND
 - [Kaiju](#kaiju) - Database files for Kaiju
@@ -51,6 +52,31 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
 
 [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
 
+### Bracken
+
+[Bracken](https://github.com/jenniferlu717/Bracken/)(Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `bracken/`
+  - `<db_name>/`
+    - `database100mers.kmer_distrib`: Bracken kmer distribution file
+    - `database100mers.kraken`: Bracken index file
+    - `database.kraken`: Bracken database file
+    - `hash.k2d`: Kraken2 hash database file
+    - `opts.k2d`: Kraken2 opts database file
+    - `taxo.k2d`: Kraken2 taxo database file
+    - `library/`: Intermediate Kraken2 directory containing FASTAs and related files of added genomes
+    - `taxonomy/`: Intermediate Kraken2 directory containing taxonomy files of added genomes
+    - `seqid2taxid.map`: Intermediate Kraken2 file containing taxonomy files of added genomes
+
+</details>
+
+Note that all intermediate files are required for Bracken2 database, even if Kraken2 itself only requires the `*.k2d` files.
+
+The resulting `<db_name>/` directory can be given to Bracken itself with `bracken -d <your_database_name>` etc.
+
 ### Centrifuge
 
 [Centrifuge](https://github.com/bbuchfink/diamond) is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples.
@@ -105,6 +131,9 @@ The `fmi` file can be given to Kaiju itself with `kaiju -f <your_database>.fmi`
     - `hash.k2d`: Kraken2 hash database file
     - `opts.k2d`: Kraken2 opts database file
     - `taxo.k2d`: Kraken2 taxo database file
+    - `library/`: Intermediate directory containing FASTAs and related files of added genomes (only present if `--build_bracken` or `--kraken2_keepintermediate` supplied)
+    - `taxonomy/`: Intermediate directory containing taxonomy files of added genomes (only present if `--build_bracken` or `--kraken2_keepintermediate` supplied)
+    - `seqid2taxid.map`: Intermediate file containing taxonomy files of added genomes (only present if `--build_bracken` or `--kraken2_keepintermediate` supplied)
 
 </details>
 
diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
index 1a9db9f..adf9a3d 100644
--- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf
@@ -194,6 +194,7 @@ def toolCitationText() {
     // Uncomment function in methodsDescriptionText to render in MultiQC report
     def citation_text = [
             "Tools used in the workflow included:",
+            params.build_bracken    ? "Bracken (Lu et al. 2017)," : "",
             params.build_centrifuge ? "Centrifuge (Kim et al. 2016)," : "",
             params.build_diamond    ? "DIAMOND (Buchfink et al. 2015)," : "",
             params.build_kaiju      ? "Kaiju (Menzel et al. 2016)," : "",
@@ -211,6 +212,7 @@ def toolBibliographyText() {
     // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "<li>Author (2023) Pub name, Journal, DOI</li>" : "",
     // Uncomment function in methodsDescriptionText to render in MultiQC report
     def reference_text = [
+            params.build_bracken    ? "<li>Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. <a href=\"https://doi.org/10.7717/peerj-cs.104\">10.7717/peerj-cs.104</a></li>" : "",
             params.build_centrifuge ? "<li>Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729.  <a href=\"https://doi.org/10.1101/gr.210641.116\">10.1101/gr.210641.116</a></li>" : "",
             params.build_diamond    ? "<li>Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. <a href=\"https://doi.org/10.1038/nmeth.3176\">10.1038/nmeth.3176</a></li>" : "",
             params.build_kaiju      ? "<li>Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. <a href=\"https://doi.org/10.1038/ncomms11257\">10.1038/ncomms11257</a></li>" : "",
diff --git a/tests/test.nf.test b/tests/test.nf.test
index 81a76d2..54bea3e 100644
--- a/tests/test.nf.test
+++ b/tests/test.nf.test
@@ -26,6 +26,7 @@ nextflow_pipeline {
                         path("$outputDir/kraken2/database/taxo.k2d"),
                         file("$outputDir/bracken/database/database100mers.kmer_distrib").name,
                         file("$outputDir/bracken/database/database100mers.kraken").name,
+                        file("$outputDir/bracken/database/database.kraken").name,
                         path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'),
                         path("$outputDir/malt/malt_index/index0.idx"),
                         path("$outputDir/malt/malt_index/ref.db"),
diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap
index bf8fe23..bf9427f 100644
--- a/tests/test.nf.test.snap
+++ b/tests/test.nf.test.snap
@@ -14,6 +14,7 @@
             "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88",
             "database100mers.kmer_distrib",
             "database100mers.kraken",
+            "database.kraken",
             true,
             "index0.idx:md5,876139dc930e68992cd2625e08bba48a",
             "ref.db:md5,377073f58a9f9b85acca59fcf21744a9",
@@ -27,6 +28,6 @@
             "nf-test": "0.8.4",
             "nextflow": "24.04.1"
         },
-        "timestamp": "2024-05-23T08:00:31.799820635"
+        "timestamp": "2024-05-23T08:15:27.641419595"
     }
 }
\ No newline at end of file