From fb23ab0558b8d6a4bb0357a7151bfb3f52310d15 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 13:03:39 +0100 Subject: [PATCH 1/9] Start adding gagnon --- modules.json | 5 ++ .../nf-core/ganon/buildcustom/environment.yml | 5 ++ modules/nf-core/ganon/buildcustom/main.nf | 60 +++++++++++++ modules/nf-core/ganon/buildcustom/meta.yml | 77 +++++++++++++++++ .../ganon/buildcustom/tests/main.nf.test | 69 +++++++++++++++ .../ganon/buildcustom/tests/main.nf.test.snap | 72 ++++++++++++++++ .../ganon/buildcustom/tests/nextflow.config | 5 ++ .../nf-core/ganon/buildcustom/tests/tags.yml | 2 + workflows/createtaxdb.nf | 85 ++++++++++++------- 9 files changed, 350 insertions(+), 30 deletions(-) create mode 100644 modules/nf-core/ganon/buildcustom/environment.yml create mode 100644 modules/nf-core/ganon/buildcustom/main.nf create mode 100644 modules/nf-core/ganon/buildcustom/meta.yml create mode 100644 modules/nf-core/ganon/buildcustom/tests/main.nf.test create mode 100644 modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap create mode 100644 modules/nf-core/ganon/buildcustom/tests/nextflow.config create mode 100644 modules/nf-core/ganon/buildcustom/tests/tags.yml diff --git a/modules.json b/modules.json index cd86b5a..f681212 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "ganon/buildcustom": { + "branch": "master", + "git_sha": "58b4d685b1c93429917fec530f5d656aca3f2ef6", + "installed_by": ["modules"] + }, "gunzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/ganon/buildcustom/environment.yml b/modules/nf-core/ganon/buildcustom/environment.yml new file mode 100644 index 0000000..0e073d5 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ganon=2.1.0 diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf new file mode 100644 index 0000000..212e49f --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -0,0 +1,60 @@ +process GANON_BUILDCUSTOM { + tag "${meta.id}" + label 'process_high' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ganon:2.1.0--py310hab1bfa5_1' + : 'biocontainers/ganon:2.1.0--py310hab1bfa5_1'}" + + input: + tuple val(meta), path(input) + val input_type + path taxonomy_files + path genome_size_files + + output: + tuple val(meta), path("*.{hibf,ibf,tax}"), emit: db + tuple val(meta), path("*.info.tsv"), emit: info, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_cmd = input_type == 'fasta' ? "--input ${input}" : input_type == 'tsv' ? "--input-file ${input}" : error("Invalid input type: ${input_type}. Options: fasta, tsv") + def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : "" + def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : "" + """ + ganon \\ + build-custom \\ + --threads ${task.cpus} \\ + --input ${input} \\ + --db-prefix ${prefix} \\ + ${taxonomy_args} \\ + ${genome_size_args} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ganon: \$(echo \$(ganon --version 2>1) | sed 's/.*ganon //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : "" + def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : "" + """ + touch ${prefix}.hibf + touch ${prefix}.tax + touch ${prefix}.info.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ganon: \$(echo \$(ganon --version 2>1) | sed 's/.*ganon //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml new file mode 100644 index 0000000..39bc073 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/meta.yml @@ -0,0 +1,77 @@ +name: "ganon_buildcustom" +description: Build ganon database using custom reference sequences. +keywords: + - ganon + - metagenomics + - profiling + - taxonomy + - k-mer + - database +tools: + - "ganon": + description: "ganon classifies short DNA sequences against large sets of genomic + reference sequences efficiently" + homepage: "https://github.com/pirovc/ganon" + documentation: "https://github.com/pirovc/ganon" + tool_dev_url: "https://github.com/pirovc/ganon" + doi: "10.1093/bioinformatics/btaa458" + licence: ["MIT"] + identifier: biotools:ganon +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + List of input FASTA files, or a directory containing input FASTA files. + Note you must supply --input-extension via ext.args if FASTA extensions do not end in the default `fna.gz`. + pattern: "*" + - - input_type: + type: string + description: | + Specify whether the file(s) given to the input channel are in FASTA format (and will be supplied as --input) + or in TSV format (and will be supplied as --input-file). For TSV format, the 'file' column should be just the + file name so that it's local to the working directory of this process. + pattern: "fasta|tsv" + - - taxonomy_files: + type: file + description: Pre-downloaded taxonomy files of input sequences. See ganon docs + for formats + - - genome_size_files: + type: file + description: Pre-downloaded NCBI or GTDB genome size files of input sequences. + See ganon docs for formats + pattern: "{species_genome_size.txt.gz,*_metadata.tar.gz}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{hibf,ibf,tax}": + type: file + description: ganon database files + pattern: "*.{ibf,tax}" + - info: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.info.tsv": + type: file + description: Copy of target info generated. Can be used for updating database. + pattern: "*info.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test b/modules/nf-core/ganon/buildcustom/tests/main.nf.test new file mode 100644 index 0000000..8fa4227 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process GANON_BUILDCUSTOM" + script "../main.nf" + process "GANON_BUILDCUSTOM" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ganon" + tag "ganon/buildcustom" + + test("sarscov2 - genome fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + ] + input[1] = 'fasta' + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db.get(0).get(1).findAll { file(it).name != "test.tax" }, + process.out.versions + ).match() + }, + { assert file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") }, + ) + } + } + + test("sarscov2 - genome fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + ] + input[1] = 'fasta' + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap new file mode 100644 index 0000000..2c3243f --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - genome fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test.hibf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tax:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test.hibf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tax:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "info": [ + [ + { + "id": "test" + }, + "test.info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-07T17:00:22.98042261" + }, + "sarscov2 - genome fasta": { + "content": [ + [ + "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614" + ], + [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-07T19:03:25.060306554" + } +} \ No newline at end of file diff --git a/modules/nf-core/ganon/buildcustom/tests/nextflow.config b/modules/nf-core/ganon/buildcustom/tests/nextflow.config new file mode 100644 index 0000000..a12988e --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GANON_BUILDCUSTOM { + ext.args = "--input-target sequence" + } +} diff --git a/modules/nf-core/ganon/buildcustom/tests/tags.yml b/modules/nf-core/ganon/buildcustom/tests/tags.yml new file mode 100644 index 0000000..46c2aa4 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/tags.yml @@ -0,0 +1,2 @@ +ganon/buildcustom: + - "modules/nf-core/ganon/buildcustom/**" diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 51c2fe6..9fef848 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -4,11 +4,11 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' // Preprocessing include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' @@ -20,6 +20,7 @@ include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/ // Database building (with specific auxiliary modules) include { CENTRIFUGE_BUILD } from '../modules/nf-core/centrifuge/build/main' include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' +include { GANON_BUILDCUSTOM } from '../modules/nf-core/ganon/buildcustom/main' include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' include { KRAKENUNIQ_BUILD } from '../modules/nf-core/krakenuniq/build/main' include { UNZIP } from '../modules/nf-core/unzip/main' @@ -75,8 +76,9 @@ workflow CREATETAXDB { ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file - ch_singleref_for_dna = CAT_CAT_DNA(ch_prepped_dna_fastas) + CAT_CAT_DNA(ch_prepped_dna_fastas) ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first()) + ch_singleref_for_dna = CAT_CAT_DNA.out } // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju... @@ -101,7 +103,8 @@ workflow CREATETAXDB { ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix(ch_aa_for_zipping.zipped).groupTuple() //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) - ch_singleref_for_aa = CAT_CAT_AA(ch_prepped_aa_fastas) + CAT_CAT_AA(ch_prepped_aa_fastas) + ch_singleref_for_aa = CAT_CAT_AA.out_file ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first()) } @@ -114,7 +117,7 @@ workflow CREATETAXDB { // Module: Run CENTRIFUGE/BUILD if (params.build_centrifuge) { - CENTRIFUGE_BUILD(CAT_CAT_DNA.out.file_out, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, []) + CENTRIFUGE_BUILD(ch_singleref_for_dna, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, []) ch_versions = ch_versions.mix(CENTRIFUGE_BUILD.out.versions.first()) ch_centrifuge_output = CENTRIFUGE_BUILD.out.cf } @@ -125,7 +128,7 @@ workflow CREATETAXDB { // MODULE: Run DIAMOND/MAKEDB if (params.build_diamond) { - DIAMOND_MAKEDB(CAT_CAT_AA.out.file_out, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp) + DIAMOND_MAKEDB(ch_singleref_for_aa, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp) ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) ch_diamond_output = DIAMOND_MAKEDB.out.db } @@ -133,10 +136,27 @@ workflow CREATETAXDB { ch_diamond_output = Channel.empty() } + if (params.build_ganon) { + ch_ganon_input_tsv = ch_prepped_dna_fastas + .map { meta, file -> + [meta, file] + [file.name(), meta.id, meta.taxid] + } + .map { it.values().join("\t") } + .collectFile { + name: "ganon_input.tsv" + newLine: true + } + + GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', tax_file, []) + ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) + ch_ganon_output = GANON_BUILDCUSTOM.out.db + } + // MODULE: Run KAIJU/MKFMI if (params.build_kaiju) { - KAIJU_MKFMI(CAT_CAT_AA.out.file_out) + KAIJU_MKFMI(ch_singleref_for_aa) ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first()) ch_kaiju_output = KAIJU_MKFMI.out.fmi } @@ -149,7 +169,7 @@ workflow CREATETAXDB { // Condition is inverted because subworkflow asks if you want to 'clean' (true) or not, but pipeline says to 'keep' if (params.build_kraken2 || params.build_bracken) { def k2_keepintermediates = params.kraken2_keepintermediate || params.build_bracken ? false : true - FASTA_BUILD_ADD_KRAKEN2_BRACKEN(CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken) + FASTA_BUILD_ADD_KRAKEN2_BRACKEN(ch_singleref_for_dna, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken) ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first()) ch_kraken2_bracken_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db } @@ -214,25 +234,31 @@ workflow CREATETAXDB { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.empty() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( @@ -250,14 +276,13 @@ workflow CREATETAXDB { [], [] ) - multiqc_report = MULTIQC.out.report.toList() emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html centrifuge_database = ch_centrifuge_output diamond_database = ch_diamond_output + ganon_database = ch_ganon_output kaiju_database = ch_kaiju_output kraken2_bracken_database = ch_kraken2_bracken_output krakenuniq_database = ch_krakenuniq_output From 4541dad0e4434a738f3f91910e231922f2efa211 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 9 Nov 2024 16:24:17 +0100 Subject: [PATCH 2/9] Continue work --- conf/modules.config | 4 +++ modules/nf-core/ganon/buildcustom/main.nf | 2 +- nextflow.config | 8 +++--- nextflow_schema.json | 31 +++++++++++++---------- workflows/createtaxdb.nf | 30 ++++++++++++++-------- 5 files changed, 47 insertions(+), 28 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2ed4477..902867f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -47,6 +47,10 @@ process { ] } + withName: GANON_BUILD { + ext.args = {"--verbose"} + } + withName: MALT_BUILD { ext.args = { "--sequenceType ${params.malt_sequencetype}" } } diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf index 212e49f..396f8e1 100644 --- a/modules/nf-core/ganon/buildcustom/main.nf +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -30,7 +30,7 @@ process GANON_BUILDCUSTOM { ganon \\ build-custom \\ --threads ${task.cpus} \\ - --input ${input} \\ + $input_cmd \\ --db-prefix ${prefix} \\ ${taxonomy_args} \\ ${genome_size_args} \\ diff --git a/nextflow.config b/nextflow.config index 6eac678..147fef9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,14 +59,16 @@ params { // tool specific options build_bracken = false + build_centrifuge = false build_diamond = false + build_ganon = false build_kaiju = false - build_malt = false - malt_sequencetype = "DNA" - build_centrifuge = false build_kraken2 = false kraken2_keepintermediate = false build_krakenuniq = false + build_malt = false + malt_sequencetype = "DNA" + } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 0553172..f570b60 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -123,23 +123,15 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input." }, - "build_kaiju": { + "build_ganon": { "type": "boolean", - "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", + "description": "Turn on building of ganon database. Requires nucleotide FASTA file input.", "fa_icon": "fas fa-toggle-on" }, - "build_malt": { + "build_kaiju": { "type": "boolean", - "fa_icon": "fas fa-toggle-on", - "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." - }, - "malt_sequencetype": { - "type": "string", - "default": "DNA", - "description": "Specify type of input sequence being given to MALT", - "enum": ["DNA", "Protein"], - "help_text": "Use to specify whether the reference sequences are DNA or Protein sequences. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` ", - "fa_icon": "fas fa-dna" + "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", + "fa_icon": "fas fa-toggle-on" }, "build_kraken2": { "type": "boolean", @@ -155,6 +147,19 @@ "type": "boolean", "fa_icon": "fas fa-toggle-on", "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input." + }, + "build_malt": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." + }, + "malt_sequencetype": { + "type": "string", + "default": "DNA", + "description": "Specify type of input sequence being given to MALT", + "enum": ["DNA", "Protein"], + "help_text": "Use to specify whether the reference sequences are DNA or Protein sequences. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` ", + "fa_icon": "fas fa-dna" } }, "fa_icon": "fas fa-database" diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 9fef848..f9f69c3 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -57,11 +57,11 @@ workflow CREATETAXDB { // PREPARE: Prepare input for single file inputs modules - if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq].any()) { + if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq, params.build_ganon].any()) { // Pull just DNA sequences ch_dna_refs_for_singleref = ch_samplesheet - .map { meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_dna] } + .map { meta, fasta_dna, fasta_aa -> [meta, fasta_dna] } .filter { meta, fasta_dna -> fasta_dna } @@ -72,7 +72,7 @@ workflow CREATETAXDB { } GUNZIP_DNA(ch_dna_for_unzipping.zipped) - ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).groupTuple() + ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).tap { ch_prepped_dna_fastas_ungrouped }.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file @@ -137,18 +137,26 @@ workflow CREATETAXDB { } if (params.build_ganon) { - ch_ganon_input_tsv = ch_prepped_dna_fastas - .map { meta, file -> - [meta, file] - [file.name(), meta.id, meta.taxid] + ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped + .map { meta, fasta -> + // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why + def fasta_name = fasta.toString().split('/').last() + [fasta_name, meta.id, meta.taxid] } - .map { it.values().join("\t") } - .collectFile { - name: "ganon_input.tsv" + .map { it.join("\t") } + .collectFile ( + name: "ganon_fasta_input.tsv", newLine: true + ) + .map{ + [[id: params.dbname], it] } - GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', tax_file, []) + // Nodes must come first + ch_ganon_tax_files = Channel.fromPath(ch_taxonomy_nodesdmp).combine(Channel.fromPath(ch_taxonomy_namesdmp)) + + // TODO Fix module so `input_cmd` is used and add test! + GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', ch_ganon_tax_files, []) ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) ch_ganon_output = GANON_BUILDCUSTOM.out.db } From 3227099e5f77609409de31b4c651a6e36a2d3bad Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 9 Nov 2024 16:24:50 +0100 Subject: [PATCH 3/9] Revert manual change to ganonbuild module (to upstreeam the fix) --- modules/nf-core/ganon/buildcustom/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf index 396f8e1..212e49f 100644 --- a/modules/nf-core/ganon/buildcustom/main.nf +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -30,7 +30,7 @@ process GANON_BUILDCUSTOM { ganon \\ build-custom \\ --threads ${task.cpus} \\ - $input_cmd \\ + --input ${input} \\ --db-prefix ${prefix} \\ ${taxonomy_args} \\ ${genome_size_args} \\ From 796fb7e526c04ed4f4ec5981079714f99fedfbfc Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 10:13:18 +0100 Subject: [PATCH 4/9] Add working ganon-build (missing docs) --- modules.json | 2 +- modules/nf-core/ganon/buildcustom/main.nf | 6 +- modules/nf-core/ganon/buildcustom/meta.yml | 12 ++-- .../ganon/buildcustom/tests/main.nf.test | 55 +++++++++++++++++-- .../ganon/buildcustom/tests/main.nf.test.snap | 47 +++++++++++----- .../ganon/buildcustom/tests/nextflow.config | 2 +- modules/nf-core/malt/build/main.nf | 18 +++--- nextflow.config | 2 +- workflows/createtaxdb.nf | 17 +++--- 9 files changed, 112 insertions(+), 49 deletions(-) diff --git a/modules.json b/modules.json index f681212..ebd0e4f 100644 --- a/modules.json +++ b/modules.json @@ -32,7 +32,7 @@ }, "ganon/buildcustom": { "branch": "master", - "git_sha": "58b4d685b1c93429917fec530f5d656aca3f2ef6", + "git_sha": "4265ef4b3b9af8877671715b081f102041c64cfd", "installed_by": ["modules"] }, "gunzip": { diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf index 212e49f..41ffd68 100644 --- a/modules/nf-core/ganon/buildcustom/main.nf +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -8,7 +8,7 @@ process GANON_BUILDCUSTOM { input: tuple val(meta), path(input) - val input_type + path input_tsv path taxonomy_files path genome_size_files @@ -23,14 +23,14 @@ process GANON_BUILDCUSTOM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input_cmd = input_type == 'fasta' ? "--input ${input}" : input_type == 'tsv' ? "--input-file ${input}" : error("Invalid input type: ${input_type}. Options: fasta, tsv") + def input_cmd = input_tsv ? "--input-file ${input_tsv}" : "--input ${input}" def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : "" def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : "" """ ganon \\ build-custom \\ --threads ${task.cpus} \\ - --input ${input} \\ + ${input_cmd} \\ --db-prefix ${prefix} \\ ${taxonomy_args} \\ ${genome_size_args} \\ diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml index 39bc073..5c481ec 100644 --- a/modules/nf-core/ganon/buildcustom/meta.yml +++ b/modules/nf-core/ganon/buildcustom/meta.yml @@ -28,14 +28,14 @@ input: description: | List of input FASTA files, or a directory containing input FASTA files. Note you must supply --input-extension via ext.args if FASTA extensions do not end in the default `fna.gz`. - pattern: "*" - - - input_type: + pattern: "*.{fasta,fna,fa,fa,fasta.gz,fna.gz,fa.gz,fa.gz}" + - - input_tsv: type: string description: | - Specify whether the file(s) given to the input channel are in FASTA format (and will be supplied as --input) - or in TSV format (and will be supplied as --input-file). For TSV format, the 'file' column should be just the - file name so that it's local to the working directory of this process. - pattern: "fasta|tsv" + (Optional) Specify an TSV file containing the paths, and relevant metadata to the input FASTA files to use the `--input-file` option. + The 'file' column should be just the file name of each FASTA file (so that it's local to the working directory of the process). + See ganon documentation for more more information on the other columns. + pattern: "*tsv" - - taxonomy_files: type: file description: Pre-downloaded taxonomy files of input sequences. See ganon docs diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test b/modules/nf-core/ganon/buildcustom/tests/main.nf.test index 8fa4227..9fe3948 100644 --- a/modules/nf-core/ganon/buildcustom/tests/main.nf.test +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test @@ -10,16 +10,19 @@ nextflow_process { tag "ganon" tag "ganon/buildcustom" - test("sarscov2 - genome fasta") { + test("sarscov2 - genome - fasta") { when { + params { + module_args = '--input-target sequence' + } process { """ input[0] = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), ] - input[1] = 'fasta' + input[1] = [] input[2] = [] input[3] = [] """ @@ -31,10 +34,49 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.db.get(0).get(1).findAll { file(it).name != "test.tax" }, - process.out.versions + process.out.versions, + file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") ).match() }, - { assert file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") }, + ) + } + } + +test("sarscov2 - genome - tsv") { + + when { + params { + module_args = '--input-target file' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + ] + input[1] = ch_ganon_input_tsv = Channel.of(["genome.fasta", "Severe_acute_respiratory_syndrome_coronavirus_2", "2697049"]). + map { it.join("\t") } + .collectFile ( + name: "ganon_fasta_input.tsv", + newLine: true + ) + input[2] = [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) + ] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db.get(0).get(1), + process.out.versions + ).match() + } ) } } @@ -44,13 +86,16 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ input[0] = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), ] - input[1] = 'fasta' + input[1] = [] input[2] = [] input[3] = [] """ diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap index 2c3243f..e27a749 100644 --- a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap @@ -1,4 +1,36 @@ { + "sarscov2 - genome - tsv": { + "content": [ + [ + "test.hibf:md5,9edfe4c3873d621a88ebcad438dca42c", + "test.tax:md5,e15400a1e43cce61545834695da46465" + ], + [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-13T13:37:14.320278404" + }, + "sarscov2 - genome - fasta": { + "content": [ + [ + "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614" + ], + [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ], + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-13T13:36:52.317157496" + }, "sarscov2 - genome fasta - stub": { "content": [ { @@ -53,20 +85,5 @@ "nextflow": "24.04.4" }, "timestamp": "2024-10-07T17:00:22.98042261" - }, - "sarscov2 - genome fasta": { - "content": [ - [ - "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614" - ], - [ - "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-07T19:03:25.060306554" } } \ No newline at end of file diff --git a/modules/nf-core/ganon/buildcustom/tests/nextflow.config b/modules/nf-core/ganon/buildcustom/tests/nextflow.config index a12988e..15c20b6 100644 --- a/modules/nf-core/ganon/buildcustom/tests/nextflow.config +++ b/modules/nf-core/ganon/buildcustom/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: GANON_BUILDCUSTOM { - ext.args = "--input-target sequence" + ext.args = params.module_args } } diff --git a/modules/nf-core/malt/build/main.nf b/modules/nf-core/malt/build/main.nf index 6f05e9e..710c82e 100644 --- a/modules/nf-core/malt/build/main.nf +++ b/modules/nf-core/malt/build/main.nf @@ -1,11 +1,9 @@ process MALT_BUILD { - label 'process_high' - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' : - 'biocontainers/malt:0.61--hdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' + : 'biocontainers/malt:0.61--hdfd78af_0'}" input: path fastas @@ -13,8 +11,8 @@ process MALT_BUILD { path mapping_db output: - path "malt_index/" , emit: index - path "versions.yml" , emit: versions + path "malt_index/", emit: index + path "versions.yml", emit: versions path "malt-build.log", emit: log when: @@ -28,10 +26,10 @@ process MALT_BUILD { malt-build \\ -v \\ --input ${fastas.join(' ')} \\ - $igff \\ + ${igff} \\ -d 'malt_index/' \\ - -t $task.cpus \\ - $args \\ + -t ${task.cpus} \\ + ${args} \\ -mdb ${mapping_db}/*.db |&tee malt-build.log cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index 147fef9..eab3400 100644 --- a/nextflow.config +++ b/nextflow.config @@ -218,7 +218,7 @@ set -C # No clobber - prevent output redirection from overwriting files. // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index f9f69c3..34712c7 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -61,8 +61,8 @@ workflow CREATETAXDB { // Pull just DNA sequences ch_dna_refs_for_singleref = ch_samplesheet - .map { meta, fasta_dna, fasta_aa -> [meta, fasta_dna] } - .filter { meta, fasta_dna -> + .map { meta, fasta_dna, _fasta_aa -> [meta, fasta_dna] } + .filter { _meta, fasta_dna -> fasta_dna } @@ -72,7 +72,8 @@ workflow CREATETAXDB { } GUNZIP_DNA(ch_dna_for_unzipping.zipped) - ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).tap { ch_prepped_dna_fastas_ungrouped }.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() + ch_prepped_dna_fastas_ungrouped = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped) + ch_prepped_dna_fastas = ch_prepped_dna_fastas_ungrouped.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file @@ -137,6 +138,9 @@ workflow CREATETAXDB { } if (params.build_ganon) { + + ch_ganon_input_fastas = ch_prepped_dna_fastas_ungrouped.collect() + ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped .map { meta, fasta -> // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why @@ -144,19 +148,18 @@ workflow CREATETAXDB { [fasta_name, meta.id, meta.taxid] } .map { it.join("\t") } - .collectFile ( + .collectFile( name: "ganon_fasta_input.tsv", newLine: true ) - .map{ + .map { [[id: params.dbname], it] } // Nodes must come first ch_ganon_tax_files = Channel.fromPath(ch_taxonomy_nodesdmp).combine(Channel.fromPath(ch_taxonomy_namesdmp)) - // TODO Fix module so `input_cmd` is used and add test! - GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', ch_ganon_tax_files, []) + GANON_BUILDCUSTOM(ch_prepped_dna_fastas, ch_ganon_input_tsv.map { _meta, tsv -> tsv }, ch_ganon_tax_files, []) ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) ch_ganon_output = GANON_BUILDCUSTOM.out.db } From 7408229bc846feb1eeed877b4e47a87a4b941a6d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 12:13:25 +0100 Subject: [PATCH 5/9] Add better input validation tests --- conf/modules.config | 6 +- nextflow.config | 236 +++++++++--------- .../utils_nfcore_createtaxdb_pipeline/main.nf | 27 +- workflows/createtaxdb.nf | 4 +- 4 files changed, 150 insertions(+), 123 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 902867f..f1b2838 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,8 +18,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, @@ -48,7 +48,7 @@ process { } withName: GANON_BUILD { - ext.args = {"--verbose"} + ext.args = { "--verbose" } } withName: MALT_BUILD { diff --git a/nextflow.config b/nextflow.config index eab3400..65cdbb4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,14 +11,14 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -35,40 +35,39 @@ params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options - config_profile_name = null - config_profile_description = null + config_profile_name = null + config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true // General parameters - dbname = null - save_concatenated_fastas = false + dbname = null + save_concatenated_fastas = false - accession2taxid = null - prot2taxid = null - nucl2taxid = null - nodesdmp = null - namesdmp = null - malt_mapdb = null + accession2taxid = null + prot2taxid = null + nucl2taxid = null + nodesdmp = null + namesdmp = null + malt_mapdb = null // tool specific options - build_bracken = false - build_centrifuge = false - build_diamond = false - build_ganon = false - build_kaiju = false - build_kraken2 = false - kraken2_keepintermediate = false - build_krakenuniq = false - build_malt = false - malt_sequencetype = "DNA" - + build_bracken = false + build_centrifuge = false + build_diamond = false + build_ganon = false + build_kaiju = false + build_kraken2 = false + kraken2_keepintermediate = false + build_krakenuniq = false + build_malt = false + malt_sequencetype = "DNA" } // Load base.config by default for all pipelines @@ -76,90 +75,90 @@ includeConfig 'conf/base.config' profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } wave { apptainer.ociAutoPull = true @@ -169,14 +168,19 @@ profiles { wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + test { + includeConfig 'conf/test.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + test_nothing { + includeConfig 'conf/test_nothing.config' } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - } // Load nf-core custom profiles from different Institutions @@ -188,10 +192,10 @@ includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${pa // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -218,7 +222,7 @@ set -C # No clobber - prevent output redirection from overwriting files. // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" @@ -249,17 +253,17 @@ manifest { // Nextflow plugins plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.1.1' } validation { defaultIgnoreParams = ["genomes"] help { - enabled = true - command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" + enabled = true + command = "nextflow run ${manifest.name} -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" showHiddenParameter = "show_hidden" - beforeText = """ + beforeText = """ -\033[2m----------------------------------------------------\033[0m- \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m \033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m @@ -269,7 +273,7 @@ validation { \033[0;35m ${manifest.name} ${manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x @@ -279,7 +283,7 @@ validation { } summary { beforeText = validation.help.beforeText - afterText = validation.help.afterText + afterText = validation.help.afterText } } diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index c86caa0..589b799 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -130,10 +130,35 @@ workflow PIPELINE_COMPLETION { // def validateInputParameters() { - // Validate DIAMOND parameter combinations + // Validate CENTRIFUGE auxiliary file combinations + if (params.build_centrifuge && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_centrifuge, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for CENTRIFUGE)') + } + + // Validate DIAMOND auxiliary file combinations if (params.build_diamond && [!params.prot2taxid, !params.nodesdmp, !params.namesdmp].any()) { error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp (all are mandatory for DIAMOND)') } + + // Validate GANON parameter combinations + if (params.build_ganon && [!params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_ganon, but missing at least one of: --nodesdmp, or --namesdmp (all are mandatory for GANON)') + } + + // Validate BRACKEN/KRAKEN parameter combinations + if ((params.build_bracken || params.build_kraken2) && [!params.accession2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_kraken2 or --bracken, but missing at least one of: --accession2taxid, --nodesdmp, or --namesdmp (all are mandatory for BRACKEN/KRAKEN2)') + } + + // Validate KRAKENUNIQ auxiliary file combinations + if (params.build_krakenuniq && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_krakenuniq, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for KRAKENUNIQ)') + } + + // Validate MALT auxiliary file combinations + if (params.build_krakenuniq && [!params.malt_mapdb].any()) { + error('[nf-core/createtaxdb] Supplied --build_malt, but missing: --malt_mapdb (all are mandatory for MALT)') + } } // diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 34712c7..704faa8 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -139,8 +139,6 @@ workflow CREATETAXDB { if (params.build_ganon) { - ch_ganon_input_fastas = ch_prepped_dna_fastas_ungrouped.collect() - ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped .map { meta, fasta -> // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why @@ -192,7 +190,7 @@ workflow CREATETAXDB { if (params.build_krakenuniq) { ch_taxdmpfiles_for_krakenuniq = Channel.of(ch_taxonomy_namesdmp).combine(Channel.of(ch_taxonomy_nodesdmp)).map { [it] } - ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, reads, taxdump -> [meta, reads, taxdump, ch_nucl2taxid] } + ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, fastas, taxdump -> [meta, fastas, taxdump, ch_nucl2taxid] } KRAKENUNIQ_BUILD(ch_input_for_krakenuniq) ch_versions = ch_versions.mix(KRAKENUNIQ_BUILD.out.versions.first()) From af9a8f645fdc458125f9873a316b2c4d6d84798b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 13:26:35 +0100 Subject: [PATCH 6/9] Add reference to ganon across all test profiles and fix typo --- conf/test.config | 1 + conf/test_full.config | 17 ++++---- conf/test_nothing.config | 1 + main.nf | 89 ++++++++++++++++++---------------------- tests/test.nf.test | 2 + workflows/createtaxdb.nf | 7 +++- 6 files changed, 58 insertions(+), 59 deletions(-) diff --git a/conf/test.config b/conf/test.config index 93cf208..d582ba1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,6 +31,7 @@ params { build_bracken = true build_diamond = true + build_ganon = true build_kaiju = true build_malt = true build_centrifuge = true diff --git a/conf/test_full.config b/conf/test_full.config index a628fd1..f14adca 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,13 +17,14 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - build_bracken = true - build_diamond = true - build_kaiju = true - build_malt = true - build_centrifuge = true - build_kraken2 = true - build_krakenuniq = true + build_bracken = true + build_diamond = true + build_ganon = true + build_kaiju = true + build_malt = true + build_centrifuge = true + build_kraken2 = true + build_krakenuniq = true } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 1f22ce2..b39e675 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -22,6 +22,7 @@ params { build_bracken = false build_diamond = false + build_ganon = false build_kaiju = false build_malt = false build_centrifuge = false diff --git a/main.nf b/main.nf index 81aefc5..ec245d9 100644 --- a/main.nf +++ b/main.nf @@ -15,51 +15,9 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CREATETAXDB } from './workflows/createtaxdb' +include { CREATETAXDB } from './workflows/createtaxdb' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// WORKFLOW: Run main analysis pipeline depending on type of input -// -workflow NFCORE_CREATETAXDB { - - take: - samplesheet // channel: samplesheet read in from --input - - main: - - // - // WORKFLOW: Run pipeline - // - ch_samplesheet = samplesheet - ch_taxonomy_namesdmp = file(params.namesdmp) - ch_taxonomy_nodesdmp = file(params.nodesdmp) - ch_accession2taxid = file(params.accession2taxid) - ch_nucl2taxid = file(params.nucl2taxid) - ch_prot2taxid = file(params.prot2taxid) - ch_malt_mapdb = file(params.malt_mapdb) - - - CREATETAXDB ( - ch_samplesheet, - ch_taxonomy_namesdmp, - ch_taxonomy_nodesdmp, - ch_accession2taxid, - ch_nucl2taxid, - ch_prot2taxid, - ch_malt_mapdb, - - ) - emit: - multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html -} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -67,12 +25,10 @@ workflow NFCORE_CREATETAXDB { */ workflow { - - main: // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION( params.version, params.validate_params, params.monochrome_logs, @@ -84,13 +40,13 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_CREATETAXDB ( + NFCORE_CREATETAXDB( PIPELINE_INITIALISATION.out.samplesheet ) // // SUBWORKFLOW: Run completion tasks // - PIPELINE_COMPLETION ( + PIPELINE_COMPLETION( params.email, params.email_on_fail, params.plaintext_email, @@ -103,6 +59,41 @@ workflow { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow NFCORE_CREATETAXDB { + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + ch_samplesheet = samplesheet + ch_taxonomy_namesdmp = file(params.namesdmp, checkIfExists: true) + ch_taxonomy_nodesdmp = file(params.nodesdmp, checkIfExists: true) + ch_accession2taxid = file(params.accession2taxid, checkIfExists: true) + ch_nucl2taxid = file(params.nucl2taxid, checkIfExists: true) + ch_prot2taxid = file(params.prot2taxid, checkIfExists: true) + ch_malt_mapdb = file(params.malt_mapdb, checkIfExists: true) + + + CREATETAXDB( + ch_samplesheet, + ch_taxonomy_namesdmp, + ch_taxonomy_nodesdmp, + ch_accession2taxid, + ch_nucl2taxid, + ch_prot2taxid, + ch_malt_mapdb + ) + + emit: + multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html +} diff --git a/tests/test.nf.test b/tests/test.nf.test index 76a073c..c8c6632 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -23,6 +23,8 @@ nextflow_pipeline { file("$outputDir/bracken/database/database.kraken").name, path("$outputDir/centrifuge/"), path("$outputDir/diamond/database.dmnd"), + path("$outputDir/ganon/database.hibf"), + path("$outputDir/ganon/database.tax"), path("$outputDir/kaiju/database.fmi"), path("$outputDir/kraken2/database/hash.k2d"), file("$outputDir/kraken2/database/opts.k2d").name, diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 704faa8..3b82d50 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -79,7 +79,7 @@ workflow CREATETAXDB { // Place in single file CAT_CAT_DNA(ch_prepped_dna_fastas) ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first()) - ch_singleref_for_dna = CAT_CAT_DNA.out + ch_singleref_for_dna = CAT_CAT_DNA.out.file_out } // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju... @@ -105,7 +105,7 @@ workflow CREATETAXDB { //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) CAT_CAT_AA(ch_prepped_aa_fastas) - ch_singleref_for_aa = CAT_CAT_AA.out_file + ch_singleref_for_aa = CAT_CAT_AA.out.file_out ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first()) } @@ -161,6 +161,9 @@ workflow CREATETAXDB { ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) ch_ganon_output = GANON_BUILDCUSTOM.out.db } + else { + ch_ganon_output = Channel.empty() + } // MODULE: Run KAIJU/MKFMI From 565d6100d295057f3b56bce34dbf544069ff3b4e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 13:30:43 +0100 Subject: [PATCH 7/9] Update test --- tests/test.nf.test.snap | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index 0bd549f..dc60a3f 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -11,6 +11,8 @@ "database.4.cf:md5,2902ec5df0db6da41a91b40d2f46b30d" ], "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462", + "database.hibf:md5,af913cecda744b02751e2f5320c35c7c", + "database.tax:md5,30f327fbe453aa1a981363fd9f4df21b", "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", "hash.k2d:md5,941118164b4bcc010593f7a7c7b30029", "opts.k2d", @@ -30,8 +32,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-08T16:33:06.699148849" + "timestamp": "2024-11-28T13:27:57.851046024" } } \ No newline at end of file From d096cee0d55392356dcae61879fc7349d8adb21c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 13:47:01 +0100 Subject: [PATCH 8/9] Add current required documentation --- CITATIONS.md | 4 ++++ README.md | 1 + docs/output.md | 17 ++++++++++++++++- .../utils_nfcore_createtaxdb_pipeline/main.nf | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CITATIONS.md b/CITATIONS.md index 354566c..1e56a20 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,6 +40,10 @@ > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. https://doi.org/10.7717/peerj-cs.104 +- [ganon](https://doi.org/10.1093/bioinformatics/btaa458) + + > Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. https://doi.org/10.1093/bioinformatics/btaa458 + - [Centrifuge](https://doi.org/10.1101/gr.210641.116) > Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. https://doi.org/10.1101/gr.210641.116 diff --git a/README.md b/README.md index 16bd3fa..a5fffbe 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ 2. Builds databases for: - [Bracken](https://doi.org/10.7717/peerj-cs.104) - [Centrifuge](https://doi.org/10.1101/gr.210641.116) + - [ganon](https://doi.org/10.1093/bioinformatics/btaa458) - [DIAMOND](https://doi.org/10.1038/nmeth.3176) - [Kaiju](https://doi.org/10.1038/ncomms11257) - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) diff --git a/docs/output.md b/docs/output.md index 9f5ddb0..762991f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,7 +14,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -- [Bracken](#bracken) - Database files for Brakcen +- [Bracken](#bracken) - Database files for Bracken +- [ganon](#ganon) - Database files for ganon - [Centrifuge](#centrifuge) - Database files for Centrifuge - [DIAMOND](#diamond) - Database files for DIAMOND - [Kaiju](#kaiju) - Database files for Kaiju @@ -92,6 +93,20 @@ The resulting `/` directory can be given to Bracken itself with `bracke A directory and `cf` files can be given to the Centrifuge command with `centrifuge -x ///` etc. +### Ganon + +[ganon](https://github.com/pirovc/ganon/) classifies genomic sequences against large sets of references efficiently, with integrated download and update of databases (refseq/genbank), taxonomic profiling (ncbi/gtdb), binning and hierarchical classification, customized reporting and more. + +
+Output files + +- `diamond/` + - `.hibf`: main bloom filter index file + - `.tax`: taxonomy tree used for taxonomy assignment +
+ +The directory containing these two files can be given to ganon itself with using the name as a prefix, e.g., `ganon classify -d ///`. + ### Diamond [DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment. diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index 589b799..ce1edde 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -186,6 +186,7 @@ def toolCitationText() { "Tools used in the workflow included:", params.build_bracken ? "Bracken (Lu et al. 2017)," : "", params.build_centrifuge ? "Centrifuge (Kim et al. 2016)," : "", + params.build_ganon ? "ganon (Piro et al. 2020)" : "", params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", params.build_kaiju ? "Kaiju (Menzel et al. 2016)," : "", params.build_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", @@ -205,6 +206,7 @@ def toolBibliographyText() { def reference_text = [ params.build_bracken ? '
  • Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. 10.7717/peerj-cs.104
  • ' : "", params.build_centrifuge ? '
  • Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. 10.1101/gr.210641.116
  • ' : "", + params.build_ganon ? "
  • Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. 10.1093/bioinformatics/btaa458
  • " : "", params.build_diamond ? '
  • Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. 10.1038/nmeth.3176
  • ' : "", params.build_kaiju ? '
  • Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. 10.1038/ncomms11257
  • ' : "", params.build_kraken2 ? '
  • Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. 10.1186/s13059-019-1891-0
  • ' : "", From 36b57423585e6a8e036ac203425f4ec6a1dcc998 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 14:01:03 +0100 Subject: [PATCH 9/9] Fix mangled regex strings --- subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index ce1edde..7fb98b7 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -245,7 +245,7 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - meta["tool_citations"] = toolCitationText().replaceAll(', .', ".").replaceAll('. .', ' .').replaceAll(', .', '.') + meta["tool_citations"] = toolCitationText().replaceAll(', \\.', ".").replaceAll('. \\.', ' .').replaceAll(', \\.', '.') meta["tool_bibliography"] = toolBibliographyText()