From 55fbe6b0de0a87fc4df6bf3128655bb57c572356 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 10 May 2024 13:11:02 +0200 Subject: [PATCH 1/7] Start adding krakenuniq --- CITATIONS.md | 4 ++ conf/modules.config | 4 ++ conf/test.config | 8 +++ conf/test_nothing.config | 1 + modules.json | 6 +++ .../nf-core/krakenuniq/build/environment.yml | 7 +++ .../krakenuniq/build/krakenuniq-build.diff | 14 ++++++ modules/nf-core/krakenuniq/build/main.nf | 37 ++++++++++++++ modules/nf-core/krakenuniq/build/meta.yml | 48 ++++++++++++++++++ nextflow.config | 1 + nextflow_schema.json | 5 ++ .../utils_nfcore_createtaxdb_pipeline/main.nf | 5 +- tests/test.nf.test | 1 + workflows/createtaxdb.nf | 49 +++++++++++++------ 14 files changed, 174 insertions(+), 16 deletions(-) create mode 100644 modules/nf-core/krakenuniq/build/environment.yml create mode 100644 modules/nf-core/krakenuniq/build/krakenuniq-build.diff create mode 100644 modules/nf-core/krakenuniq/build/main.nf create mode 100644 modules/nf-core/krakenuniq/build/meta.yml diff --git a/CITATIONS.md b/CITATIONS.md index 8451b47..5e4b03d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -56,6 +56,10 @@ > Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. https://doi.org/10.1186/s13059-019-1891-0 +- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0) + + > Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198. https://doi.org/10.1186/s13059-018-1568-0 + - [MALT](https://doi.org/10.1038/s41559-017-0446-6) > Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. https://doi.org/10.1038/s41559-017-0446-6 diff --git a/conf/modules.config b/conf/modules.config index a2ff3f1..22b8402 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -59,4 +59,8 @@ process { ext.args = { "--sequenceType ${params.malt_sequencetype}" } } + withName: 'KRAKENUNIQ_BUILD' { + ext.args = { "--jellyfish-bin \$(which jellyfish)" } + } + } diff --git a/conf/test.config b/conf/test.config index 371fe8b..2b712bb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,6 +28,7 @@ params { build_malt = true build_centrifuge = true build_kraken2 = true + build_krakenuniq = true accession2taxid = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/nucl_gb.accession2taxid' nucl2taxid = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/nucl2tax.map' @@ -36,3 +37,10 @@ params { namesdmp = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/names.dmp' malt_mapdb = 's3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip' } + +process { + withName:'KRAKENUNIQ_BUILD'{ + memory = { check_max( 24.GB * task.attempt, 'memory' ) } + + } +} diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 72c07e3..13a1998 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -30,5 +30,6 @@ params { build_malt = false build_centrifuge = false build_kraken2 = false + build_krakenuniq = false } diff --git a/modules.json b/modules.json index cb08493..4a80ba7 100644 --- a/modules.json +++ b/modules.json @@ -45,6 +45,12 @@ "git_sha": "ca87ad032a62f025f0c373facacef2df0c5411b2", "installed_by": ["fasta_build_add_kraken2"] }, + "krakenuniq/build": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"], + "patch": "modules/nf-core/krakenuniq/build/krakenuniq-build.diff" + }, "malt/build": { "branch": "master", "git_sha": "7d3bac628092d1aead36960c4b6ae41302a9f797", diff --git a/modules/nf-core/krakenuniq/build/environment.yml b/modules/nf-core/krakenuniq/build/environment.yml new file mode 100644 index 0000000..d99b5ef --- /dev/null +++ b/modules/nf-core/krakenuniq/build/environment.yml @@ -0,0 +1,7 @@ +name: krakenuniq_build +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krakenuniq=1.0.4 diff --git a/modules/nf-core/krakenuniq/build/krakenuniq-build.diff b/modules/nf-core/krakenuniq/build/krakenuniq-build.diff new file mode 100644 index 0000000..8bfb69e --- /dev/null +++ b/modules/nf-core/krakenuniq/build/krakenuniq-build.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/krakenuniq/build' +--- modules/nf-core/krakenuniq/build/main.nf ++++ modules/nf-core/krakenuniq/build/main.nf +@@ -8,7 +8,7 @@ + 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" + + input: +- tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy"), path(custom_seqid2taxid) ++ tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy/*"), path(custom_seqid2taxid) + + output: + tuple val(meta), path("$prefix/"), emit: db + +************************************************************ diff --git a/modules/nf-core/krakenuniq/build/main.nf b/modules/nf-core/krakenuniq/build/main.nf new file mode 100644 index 0000000..615dd5e --- /dev/null +++ b/modules/nf-core/krakenuniq/build/main.nf @@ -0,0 +1,37 @@ +process KRAKENUNIQ_BUILD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h19e8d03_0': + 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" + + input: + tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy/*"), path(custom_seqid2taxid) + + output: + tuple val(meta), path("$prefix/"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + custom_db = custom_library_dir ? "mkdir $prefix && mv library taxonomy $custom_seqid2taxid $prefix" : "" + """ + $custom_db + + krakenuniq-build \\ + $args \\ + --threads ${task.cpus} \\ + --db ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/krakenuniq/build/meta.yml b/modules/nf-core/krakenuniq/build/meta.yml new file mode 100644 index 0000000..f63b325 --- /dev/null +++ b/modules/nf-core/krakenuniq/build/meta.yml @@ -0,0 +1,48 @@ +name: "krakenuniq_build" +description: Download and build (custom) KrakenUniq databases +keywords: + - metagenomics + - krakenuniq + - database + - build + - ncbi +tools: + - "krakenuniq": + description: "Metagenomics classifier with unique k-mer counting for more specific results" + homepage: https://github.com/fbreitwieser/krakenuniq + documentation: https://github.com/fbreitwieser/krakenuniq + tool_dev_url: https://github.com/fbreitwieser/krakenuniq + doi: 10.1186/s13059-018-1568-0 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - custom_library_dir: + type: directory + description: Optional directory containing custom fasta files for database build + pattern: "*" + - custom_taxonomy_dir: + type: directory + description: Optional directory containing custom taxonomy files for database build + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: directory + description: Directory containing KrakenUniq database + pattern: "*/" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 4725942..31b55d3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,6 +71,7 @@ params { build_centrifuge = false build_kraken2 = false kraken2_keepintermediate = false + build_krakenuniq = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index b1344ea..e98b6b8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -145,6 +145,11 @@ "type": "boolean", "fa_icon": "fas fa-save", "description": "Retain intermediate Kraken2 build files for inspection." + }, + "build_krakenuniq": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input." } }, "fa_icon": "fas fa-database" diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index 0c4feb8..2e2a2d6 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -194,6 +194,7 @@ def toolCitationText() { params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", params.build_kaiju ? "Kaiju (Menzel et al. 2016)," : "", params.build_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", + params.build_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "", params.build_malt ? "MALT (Vågene et al. 2018)," : "", "and MultiQC (Ewels et al. 2016)", "." @@ -211,7 +212,9 @@ def toolBibliographyText() { params.build_diamond ? "
  • Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. 10.1038/nmeth.3176
  • " : "", params.build_kaiju ? "
  • Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. 10.1038/ncomms11257
  • " : "", params.build_kraken2 ? "
  • Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. 10.1186/s13059-019-1891-0
  • " : "", - params.build_malt ? "
  • Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. 10.1038/s41559-017-0446-6
  • " : "", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + params.build_krakenuniq ? "
  • Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198. 10.1186/s13059-018-1568-0
  • " : "", + params.build_malt ? "
  • Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. 10.1038/s41559-017-0446-6
  • " : "", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() return reference_text diff --git a/tests/test.nf.test b/tests/test.nf.test index 46dd23f..9067788 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -24,6 +24,7 @@ nextflow_pipeline { path("$outputDir/kraken2/db/hash.k2d"), file("$outputDir/kraken2/db/opts.k2d").name, path("$outputDir/kraken2/db/taxo.k2d"), + path("$outputDir/krakenuniq/"), path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'), path("$outputDir/malt/malt_index/index0.idx"), path("$outputDir/malt/malt_index/ref.db"), diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index ea8c59a..2ecd17b 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -5,16 +5,21 @@ */ include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// Preprocessing +include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' +include { PIGZ_COMPRESS as PIGZ_COMPRESS_DNA } from '../modules/nf-core/pigz/compress/main' +include { PIGZ_COMPRESS as PIGZ_COMPRESS_AA } from '../modules/nf-core/pigz/compress/main' include { CAT_CAT as CAT_CAT_DNA } from '../modules/nf-core/cat/cat/main' include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/main' + +// Database building (with specific auxiliary modules) include { CENTRIFUGE_BUILD } from '../modules/nf-core/centrifuge/build/main' -include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' -include { MALT_BUILD } from '../modules/nf-core/malt/build/main' -include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' -include { PIGZ_COMPRESS as PIGZ_COMPRESS_DNA } from '../modules/nf-core/pigz/compress/main' -include { PIGZ_COMPRESS as PIGZ_COMPRESS_AA } from '../modules/nf-core/pigz/compress/main' +include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' +include { KRAKENUNIQ_BUILD } from '../modules/nf-core/krakenuniq/build/main' include { UNZIP } from '../modules/nf-core/unzip/main' +include { MALT_BUILD } from '../modules/nf-core/malt/build/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -53,7 +58,7 @@ workflow CREATETAXDB { // PREPARE: Prepare input for single file inputs modules - if ( [params.build_malt, params.build_centrifuge, params.build_kraken2].any() ) { // Pull just DNA sequences + if ( [params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_krakenuniq].any() ) { // Pull just DNA sequences ch_dna_refs_for_singleref = ch_samplesheet .map{meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_dna]} @@ -69,12 +74,12 @@ workflow CREATETAXDB { } GUNZIP_DNA ( ch_dna_for_unzipping.zipped ) - ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).groupTuple() - ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) + ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix( ch_dna_for_unzipping.unzipped).groupTuple() + ch_versions = ch_versions.mix( GUNZIP_DNA.out.versions.first() ) // Place in single file ch_singleref_for_dna = CAT_CAT_DNA ( ch_prepped_dna_fastas ) - ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first()) + ch_versions = ch_versions.mix( CAT_CAT_DNA.out.versions.first() ) } // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju... @@ -98,7 +103,7 @@ workflow CREATETAXDB { } PIGZ_COMPRESS_AA ( ch_aa_for_zipping.unzipped ) - ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix(ch_aa_for_zipping.zipped).groupTuple() + ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix( ch_aa_for_zipping.zipped).groupTuple() //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) ch_singleref_for_aa = CAT_CAT_AA ( ch_prepped_aa_fastas ) @@ -115,7 +120,7 @@ workflow CREATETAXDB { if ( params.build_centrifuge ) { CENTRIFUGE_BUILD ( CAT_CAT_DNA.out.file_out, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, [] ) - ch_versions = ch_versions.mix(CENTRIFUGE_BUILD.out.versions.first()) + ch_versions = ch_versions.mix( CENTRIFUGE_BUILD.out.versions.first() ) ch_centrifuge_output = CENTRIFUGE_BUILD.out.cf } else { ch_centrifuge_output = Channel.empty() @@ -125,7 +130,7 @@ workflow CREATETAXDB { if ( params.build_diamond ) { DIAMOND_MAKEDB ( CAT_CAT_AA.out.file_out, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp ) - ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) + ch_versions = ch_versions.mix( DIAMOND_MAKEDB.out.versions.first() ) ch_diamond_output = DIAMOND_MAKEDB.out.db } else { ch_diamond_output = Channel.empty() @@ -135,7 +140,7 @@ workflow CREATETAXDB { if ( params.build_kaiju ) { KAIJU_MKFMI ( CAT_CAT_AA.out.file_out ) - ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first()) + ch_versions = ch_versions.mix( KAIJU_MKFMI.out.versions.first() ) ch_kaiju_output = KAIJU_MKFMI.out.fmi } else { ch_kaiju_output = Channel.empty() @@ -144,12 +149,25 @@ workflow CREATETAXDB { // SUBWORKFLOW: Kraken2 if ( params.build_kraken2 ) { FASTA_BUILD_ADD_KRAKEN2 ( CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, !params.kraken2_keepintermediate ) - ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2.out.versions.first()) + ch_versions = ch_versions.mix( FASTA_BUILD_ADD_KRAKEN2.out.versions ) // don't .first() here as subworkflow! ch_kraken2_output = FASTA_BUILD_ADD_KRAKEN2.out.db } else { ch_kraken2_output = Channel.empty() } + // SUBWORKFLOW: Run KRAKENUNIQ/BUILD + if ( params.build_krakenuniq ) { + + ch_taxdmpfiles_for_krakenuniq = Channel.of(ch_taxonomy_namesdmp).combine(Channel.of(ch_taxonomy_nodesdmp)).map{[it]} + ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map{ meta, reads, taxdump -> [ meta, reads, taxdump, ch_nucl2taxid ] }.dump(tag: 'input_to_ku') + + KRAKENUNIQ_BUILD ( ch_input_for_krakenuniq ) + ch_versions = ch_versions.mix( KRAKENUNIQ_BUILD.out.versions.first() ) + ch_krakenuniq_output = KRAKENUNIQ_BUILD.out.db + } else { + ch_krakenuniq_output = Channel.empty() + } + // Module: Run MALT/BUILD if ( params.build_malt ) { @@ -168,7 +186,7 @@ workflow CREATETAXDB { } MALT_BUILD (ch_input_for_malt, [], ch_malt_mapdb) - ch_versions = ch_versions.mix(MALT_BUILD.out.versions.first()) + ch_versions = ch_versions.mix( MALT_BUILD.out.versions.first() ) ch_malt_output = MALT_BUILD.out.index } else { ch_malt_output = Channel.empty() @@ -210,6 +228,7 @@ workflow CREATETAXDB { diamond_database = ch_diamond_output kaiju_database = ch_kaiju_output kraken2_database = ch_kraken2_output + krakenuniq_database = ch_krakenuniq_output malt_database = ch_malt_output } From 33f2eba5fa6bcca91948a3757e9d7c2b60d1674a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 23 May 2024 11:37:41 +0200 Subject: [PATCH 2/7] Add docs, update test profiles, start updating snap (not working) --- README.md | 1 + conf/test.config | 3 ++- conf/test_full.config | 7 +++++++ docs/output.md | 19 +++++++++++++++++++ tests/test.nf.test | 5 ++++- tests/test.nf.test.snap | 8 ++++++-- 6 files changed, 39 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 48d95ee..c265abb 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ - [DIAMOND](https://doi.org/10.1038/nmeth.3176) - [Kaiju](https://doi.org/10.1038/ncomms11257) - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + - [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0) - [MALT](https://doi.org/10.1038/s41559-017-0446-6) ## Usage diff --git a/conf/test.config b/conf/test.config index 5979782..76f98dd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,7 +16,7 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '6.GB' + max_memory = '24.GB' max_time = '6.h' // Input data @@ -42,6 +42,7 @@ params { process { withName:'KRAKENUNIQ_BUILD'{ memory = { check_max( 24.GB * task.attempt, 'memory' ) } + ext.args ="--work-on-disk" } } diff --git a/conf/test_full.config b/conf/test_full.config index 591cce3..efac808 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,4 +18,11 @@ params { // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + + build_diamond = true + build_kaiju = true + build_malt = true + build_centrifuge = true + build_kraken2 = true + build_krakenuniq = true } diff --git a/docs/output.md b/docs/output.md index 5a54002..7f3f496 100644 --- a/docs/output.md +++ b/docs/output.md @@ -18,6 +18,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [DIAMOND](#diamond) - Database files for DIAMOND - [Kaiju](#kaiju) - Database files for Kaiju - [Kraken2](#kraken2) - Database files for Kraken2 +- [KrakenUniq](#krakenuniq) - Database files for KrakenUniq - [MALT](#malt) - Database files for MALT ### MultiQC @@ -110,6 +111,24 @@ The `fmi` file can be given to Kaiju itself with `kaiju -f .fmi` The resulting `/` directory can be given to Kraken2 itself with `kraken2 --db ` etc. +### KrakenUniq + +[KrakenUniq](https://github.com/fbreitwieser/krakenuniq) Metagenomics classifier with unique k-mer counting for more specific results. + +
    +Output files + +- `kraken2/` + - `/` + - `database-build.log`: KrakenUniq build process log + - `database.idx`: KrakenUniq index file + - `database.kdb`: KrakenUniq database file + - `taxDB`: KrakenUniq taxonomy information file + +
    + +Note there may be additional files in this directory, however the ones listed above are the reportedly the required ones. + ### MALT [MALT](https://software-ab.cs.uni-tuebingen.de/download/malt) is a fast replacement for BLASTX, BLASTP and BLASTN, and provides both local and semi-global alignment capabilities. diff --git a/tests/test.nf.test b/tests/test.nf.test index 9067788..f198a59 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -24,7 +24,10 @@ nextflow_pipeline { path("$outputDir/kraken2/db/hash.k2d"), file("$outputDir/kraken2/db/opts.k2d").name, path("$outputDir/kraken2/db/taxo.k2d"), - path("$outputDir/krakenuniq/"), + path("$outputDir/krakenuniq/database/database-build.log").readLines().last().contains('database.idx'), + file("$outputDir/krakenuniq/database/database.idx"), + file("$outputDir/krakenuniq/database/database.kdb").name, + file("$outputDir/krakenuniq/database/taxDB").name, path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'), path("$outputDir/malt/malt_index/index0.idx"), path("$outputDir/malt/malt_index/ref.db"), diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index 73e0d6d..c56c4dd 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -13,6 +13,10 @@ "opts.k2d", "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88", true, + "database.idx", + "database.kdb", + "taxDB", + true, "index0.idx:md5,876139dc930e68992cd2625e08bba48a", "ref.db:md5,377073f58a9f9b85acca59fcf21744a9", "ref.idx:md5,676393d0f4826dac3f47aa5290632570", @@ -23,8 +27,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nextflow": "24.04.1" }, - "timestamp": "2024-04-11T10:59:28.687364796" + "timestamp": "2024-05-23T11:18:57.895979974" } } \ No newline at end of file From ad97077c1e3c8dddfa30807b7d55011ecd1feb8e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 23 May 2024 11:44:57 +0200 Subject: [PATCH 3/7] Lets see if this works --- tests/test.nf.test | 6 +++--- tests/test.nf.test.snap | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test.nf.test b/tests/test.nf.test index f198a59..49326ea 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -25,9 +25,9 @@ nextflow_pipeline { file("$outputDir/kraken2/db/opts.k2d").name, path("$outputDir/kraken2/db/taxo.k2d"), path("$outputDir/krakenuniq/database/database-build.log").readLines().last().contains('database.idx'), - file("$outputDir/krakenuniq/database/database.idx"), - file("$outputDir/krakenuniq/database/database.kdb").name, - file("$outputDir/krakenuniq/database/taxDB").name, + file("$outputDir/krakenuniq/database/database.idx").name, + file("$outputDir/krakenuniq/database/database.kdb"), + file("$outputDir/krakenuniq/database/taxDB"), path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'), path("$outputDir/malt/malt_index/index0.idx"), path("$outputDir/malt/malt_index/ref.db"), diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index c56c4dd..ceb61c7 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -14,8 +14,8 @@ "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88", true, "database.idx", - "database.kdb", - "taxDB", + "database.kdb:md5,5b807ed9faead95a8893a52ec187bb7b", + "taxDB:md5,1aed1afa948daffc236deba1c5d635db", true, "index0.idx:md5,876139dc930e68992cd2625e08bba48a", "ref.db:md5,377073f58a9f9b85acca59fcf21744a9", @@ -29,6 +29,6 @@ "nf-test": "0.8.4", "nextflow": "24.04.1" }, - "timestamp": "2024-05-23T11:18:57.895979974" + "timestamp": "2024-05-23T11:44:12.499883078" } } \ No newline at end of file From c721c85e8b6b5f93a47058dd2b6b89ce6c095d27 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 23 May 2024 12:52:51 +0200 Subject: [PATCH 4/7] Get database to work on github actions by reduceing kmers --- conf/modules.config | 4 ---- conf/test.config | 7 +++---- tests/test.nf.test.snap | 6 +++--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 897eae5..4dfe8e6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -51,8 +51,4 @@ process { ext.args = { "--sequenceType ${params.malt_sequencetype}" } } - withName: 'KRAKENUNIQ_BUILD' { - ext.args = { "--jellyfish-bin \$(which jellyfish)" } - } - } diff --git a/conf/test.config b/conf/test.config index 76f98dd..9c9314e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,7 +16,7 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '24.GB' + max_memory = '14.GB' max_time = '6.h' // Input data @@ -41,8 +41,7 @@ params { process { withName:'KRAKENUNIQ_BUILD'{ - memory = { check_max( 24.GB * task.attempt, 'memory' ) } - ext.args ="--work-on-disk" - + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + ext.args = "--work-on-disk --max-db-size 14 --kmer-len 15 --minimizer-len 13 --jellyfish-bin \$(which jellyfish)" } } diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index ceb61c7..64c03bd 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -14,8 +14,8 @@ "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88", true, "database.idx", - "database.kdb:md5,5b807ed9faead95a8893a52ec187bb7b", - "taxDB:md5,1aed1afa948daffc236deba1c5d635db", + "database.kdb:md5,a24fce43bedbc6c420f6e36d10c112a3", + "taxDB:md5,dad63877a3e5731d4fb5bff26bd1b8c4", true, "index0.idx:md5,876139dc930e68992cd2625e08bba48a", "ref.db:md5,377073f58a9f9b85acca59fcf21744a9", @@ -29,6 +29,6 @@ "nf-test": "0.8.4", "nextflow": "24.04.1" }, - "timestamp": "2024-05-23T11:44:12.499883078" + "timestamp": "2024-05-23T12:52:21.949823939" } } \ No newline at end of file From f7d4443ec23f49ad491ade9c3d68b817a1ba967d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 30 May 2024 10:21:21 +0200 Subject: [PATCH 5/7] Update KrakenUniq container --- modules.json | 2 +- modules/nf-core/krakenuniq/build/krakenuniq-build.diff | 2 +- modules/nf-core/krakenuniq/build/main.nf | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules.json b/modules.json index 0783422..f321f61 100644 --- a/modules.json +++ b/modules.json @@ -47,7 +47,7 @@ }, "krakenuniq/build": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "e3857325a14ef6e50e33c104c0a3be0ccaabbeb1", "installed_by": ["modules"], "patch": "modules/nf-core/krakenuniq/build/krakenuniq-build.diff" }, diff --git a/modules/nf-core/krakenuniq/build/krakenuniq-build.diff b/modules/nf-core/krakenuniq/build/krakenuniq-build.diff index 8bfb69e..9fd794c 100644 --- a/modules/nf-core/krakenuniq/build/krakenuniq-build.diff +++ b/modules/nf-core/krakenuniq/build/krakenuniq-build.diff @@ -2,7 +2,7 @@ Changes in module 'nf-core/krakenuniq/build' --- modules/nf-core/krakenuniq/build/main.nf +++ modules/nf-core/krakenuniq/build/main.nf @@ -8,7 +8,7 @@ - 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" + 'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2' }" input: - tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy"), path(custom_seqid2taxid) diff --git a/modules/nf-core/krakenuniq/build/main.nf b/modules/nf-core/krakenuniq/build/main.nf index 615dd5e..c55dd4d 100644 --- a/modules/nf-core/krakenuniq/build/main.nf +++ b/modules/nf-core/krakenuniq/build/main.nf @@ -4,8 +4,8 @@ process KRAKENUNIQ_BUILD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h19e8d03_0': - 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" + 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h6dccd9a_2': + 'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2' }" input: tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy/*"), path(custom_seqid2taxid) From 699eaa3c2272e8a8fc732472540fdf27ee88a138 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 30 May 2024 10:57:09 +0200 Subject: [PATCH 6/7] Fix tests --- tests/test.nf.test | 7 +++---- tests/test.nf.test.snap | 12 ++++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/test.nf.test b/tests/test.nf.test index 4c34e61..76a073c 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -18,16 +18,15 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - file("$outputDir/bracken/database/database100mers.kmer_distrib").name, file("$outputDir/bracken/database/database100mers.kraken").name, file("$outputDir/bracken/database/database.kraken").name, path("$outputDir/centrifuge/"), path("$outputDir/diamond/database.dmnd"), path("$outputDir/kaiju/database.fmi"), - path("$outputDir/kraken2/db/hash.k2d"), - file("$outputDir/kraken2/db/opts.k2d").name, - path("$outputDir/kraken2/db/taxo.k2d"), + path("$outputDir/kraken2/database/hash.k2d"), + file("$outputDir/kraken2/database/opts.k2d").name, + path("$outputDir/kraken2/database/taxo.k2d"), path("$outputDir/krakenuniq/database/database-build.log").readLines().last().contains('database.idx'), file("$outputDir/krakenuniq/database/database.idx").name, file("$outputDir/krakenuniq/database/database.kdb"), diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index a0781dd..8e3bb52 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -1,6 +1,9 @@ { "test_profile": { "content": [ + "database100mers.kmer_distrib", + "database100mers.kraken", + "database.kraken", [ "database.1.cf:md5,1481615ab90b5573f6d9e57f97890178", "database.2.cf:md5,d50fa66e215e80284314ff6521dcd4a4", @@ -12,13 +15,10 @@ "hash.k2d:md5,01122a04dcef29ceb3baa68a9f6e6ef5", "opts.k2d", "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88", - "database100mers.kmer_distrib", - "database100mers.kraken", - "database.kraken", true, "database.idx", "database.kdb:md5,a24fce43bedbc6c420f6e36d10c112a3", - "taxDB:md5,dad63877a3e5731d4fb5bff26bd1b8c4", + "taxDB:md5,1aed1afa948daffc236deba1c5d635db", true, "index0.idx:md5,876139dc930e68992cd2625e08bba48a", "ref.db:md5,377073f58a9f9b85acca59fcf21744a9", @@ -30,8 +30,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-05-23T12:52:21.949823939" + "timestamp": "2024-05-30T10:54:40.551963562" } } \ No newline at end of file From e94061319062334fd51e6c1cf6bceea0f5c80cf0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 30 May 2024 11:37:54 +0200 Subject: [PATCH 7/7] Update workflows/createtaxdb.nf Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> --- workflows/createtaxdb.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 2a99e04..3b5e155 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -253,7 +253,7 @@ workflow CREATETAXDB { diamond_database = ch_diamond_output kaiju_database = ch_kaiju_output kraken2_bracken_database = ch_kraken2_bracken_output - krakenuniq_database = ch_krakenuniq_output + krakenuniq_database = ch_krakenuniq_output malt_database = ch_malt_output }