diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0cd6575..25493e8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,36 +1,65 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: - - dev + - "dev" pull_request: + branches: + - "dev" + - "master" release: types: [published] workflow_dispatch: env: NXF_ANSI_LOG: false + NFTEST_VER: "0.7.3" concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: + define_nxf_versions: + name: Choose nextflow versions to test against depending on target branch + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.nxf_versions.outputs.matrix }} + steps: + - id: nxf_versions + run: | + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT + else + echo matrix='["latest-everything", "23.10.0"]' | tee -a $GITHUB_OUTPUT + fi + test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/createtaxdb') }}" + name: nf-test + needs: define_nxf_versions runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - NXF_VER: - - "24.04.2" - - "latest-everything" + NXF_VER: ${{ fromJson(needs.define_nxf_versions.outputs.matrix) }} + tags: + - "test" + profile: + - "docker" + steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - name: Check out test data + uses: actions/checkout@v3 + with: + repository: nf-core/test-datasets + ref: createtaxdb + path: test-datasets/ + fetch-depth: 1 + - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: @@ -39,21 +68,23 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data (docker) - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Install nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ - - name: Run pipeline with test data (singularity) - # TODO nf-core: You can customise CI pipeline run tests as required + - name: Run nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results - if: "${{ github.base_ref == 'master' }}" + nf-test test --tag ${{ matrix.tags }} --profile ${{ matrix.tags }},${{ matrix.profile }} --junitxml=test.xml - - name: Run pipeline with test data (conda) - # TODO nf-core: You can customise CI pipeline run tests as required + - name: Output log on failure + if: failure() run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,conda --outdir ./results - if: "${{ github.base_ref == 'master' }}" + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/output/pipeline_info/software_versions.yml + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: "*.xml" diff --git a/.gitignore b/.gitignore index 5124c9a..f704e54 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ results/ testing/ testing* *.pyc +.nf-test* +test.xml diff --git a/.nf-core.yml b/.nf-core.yml index d00116c..fcc351d 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -13,6 +13,8 @@ template: name: createtaxdb org: nf-core outdir: . - skip_features: null + skip_features: + - fastqc + - igenomes version: 1.0dev update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index 7706b5c..a5fe08a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release of nf-core/createtaxdb, created with the [nf-core](https://nf-co.re/) template. +Adds database building support for: + +- DIAMOND (added by @jfy133) +- Kaiju (added by @jfy133) +- MALT (added by @jfy133) + ### `Added` ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index a0b08e6..6cd3c3b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -39,3 +39,31 @@ - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + +- [Bracken](https://doi.org/10.7717/peerj-cs.104) + + > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. https://doi.org/10.7717/peerj-cs.104 + +- [Centrifuge](https://doi.org/10.1101/gr.210641.116) + + > Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. https://doi.org/10.1101/gr.210641.116 + +- [DIAMOND](https://doi.org/10.1038/nmeth.3176) + + > Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. https://doi.org/10.1038/nmeth.3176 + +- [Kaiju](https://doi.org/10.1038/ncomms11257) + +> Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. https://doi.org/10.1038/ncomms11257 + +- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + + > Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. https://doi.org/10.1186/s13059-019-1891-0 + +- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0) + + > Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198. https://doi.org/10.1186/s13059-018-1568-0 + +- [MALT](https://doi.org/10.1038/s41559-017-0446-6) + + > Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. https://doi.org/10.1038/s41559-017-0446-6 diff --git a/README.md b/README.md index 13ae549..16bd3fa 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

- - nf-core/createtaxdb + + nf-core/createtaxdb

@@ -19,7 +19,7 @@ ## Introduction -**nf-core/createtaxdb** is a bioinformatics pipeline that ... +**nf-core/createtaxdb** is a bioinformatics pipeline that constructs custom metagenomic classifier databases from the same input reference genome set for multiple classifiers and profilers in a highly automated and parallelised manner. - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Prepares input FASTA files for building +2. Builds databases for: + - [Bracken](https://doi.org/10.7717/peerj-cs.104) + - [Centrifuge](https://doi.org/10.1101/gr.210641.116) + - [DIAMOND](https://doi.org/10.1038/nmeth.3176) + - [Kaiju](https://doi.org/10.1038/ncomms11257) + - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + - [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0) + - [MALT](https://doi.org/10.1038/s41559-017-0446-6) ## Usage @@ -79,12 +85,14 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/createtaxdb was originally written by James A. Fellows Yates and the nf-core community. +nf-core/createtaxdb was originally written by James A. Fellows Yates, Joon Klaps, Alexander Ramos Díaz and the nf-core community. We thank the following people for their extensive assistance in the development of this pipeline: +- Zandra Fagernäs for logo design + ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2829edf..df3e662 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -13,3 +13,7 @@ report_section_order: export_plots: true disable_version_detection: true + +custom_logo: "nf-core-createtaxdb_logo_light_tax.svg" +custom_logo_url: https://nf-co.re/createtaxdb +custom_logo_title: "nf-core/createtaxdb" diff --git a/assets/schema_input.json b/assets/schema_input.json index 5951089..71dee99 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,27 +7,62 @@ "items": { "type": "object", "properties": { - "sample": { + "id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", + "unique": true, + "errorMessage": "Sequence reference name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { - "type": "string", - "format": "file-path", + "taxid": { + "type": "integer", + "unique": true, + "errorMessage": "Please provide a valid taxonomic ID in integer format", + "meta": ["taxid"] + }, + "fasta_dna": { + "anyOf": [ + { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.(fasta|fas|fa|fna)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "unique": true, + "errorMessage": "FASTA file for nucleotide sequence cannot contain spaces and must have a valid FASTA extension (fasta, fna, fa, fas, faa), optionally gzipped", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "format": "file-path" }, - "fastq_2": { - "type": "string", - "format": "file-path", + "fasta_aa": { + "anyOf": [ + { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.(fasta|fas|fa|faa)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "unique": true, + "errorMessage": "FASTA file for amino acid reference sequence cannot contain spaces and must have a valid FASTA extension (fasta, fna, fa, fas, faa), optionally gzipped", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "format": "file-path" } }, - "required": ["sample", "fastq_1"] + "required": ["id", "taxid"], + "anyOf": [ + { + "required": ["fasta_dna"] + }, + { + "required": ["fasta_aa"] + } + ] } } diff --git a/assets/test.csv b/assets/test.csv new file mode 100644 index 0000000..52ac082 --- /dev/null +++ b/assets/test.csv @@ -0,0 +1,3 @@ +id,taxid,fasta_dna,fasta_aa +Severe_acute_respiratory_syndrome_coronavirus_2,2697049,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.fasta,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.faa +Haemophilus_influenzae,727,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/haemophilus_influenzae.fna.gz, diff --git a/conf/base.config b/conf/base.config index 2861fcf..133e960 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,4 +59,9 @@ process { errorStrategy = 'retry' maxRetries = 2 } + + withName:'KAIJU_MKFMI'{ + memory = { check_max( 24.GB * task.attempt, 'memory' ) } + + } } diff --git a/conf/modules.config b/conf/modules.config index d266a38..6e4a872 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,11 +18,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: FASTQC { - ext.args = '--quiet' - } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, @@ -30,4 +27,27 @@ process { ] } + withName: CAT_CAT_DNA { + ext.prefix = { "${meta.id}.fna" } + publishDir = [ + path: { "${params.outdir}/cat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_concatenated_fastas + ] + } + + withName: CAT_CAT_AA { + ext.prefix = { "${meta.id}.faa" } + publishDir = [ + path: { "${params.outdir}/cat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_concatenated_fastas + ] + } + + withName: MALT_BUILD { + ext.args = { "--sequenceType ${params.malt_sequencetype}" } + } } diff --git a/conf/test.config b/conf/test.config index d2b9630..709e1a6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,8 +25,29 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'createtaxdb/samplesheets/test.csv' - // Genome references - genome = 'R64-1-1' + dbname = "database" + + build_bracken = true + build_diamond = true + build_kaiju = true + build_malt = true + build_centrifuge = true + build_kraken2 = true + build_krakenuniq = true + + accession2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl_gb.accession2taxid' + nucl2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl2tax.map' + prot2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/prot.accession2taxid.gz' + nodesdmp = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nodes.dmp' + namesdmp = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/names.dmp' + malt_mapdb = 's3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip' +} + +process { + withName: KRAKENUNIQ_BUILD { + memory = { check_max(12.GB * task.attempt, 'memory') } + ext.args = "--work-on-disk --max-db-size 14 --kmer-len 15 --minimizer-len 13 --jellyfish-bin $(which jellyfish)" + } } diff --git a/conf/test_full.config b/conf/test_full.config index 5938685..a628fd1 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -19,6 +19,11 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - // Genome references - genome = 'R64-1-1' + build_bracken = true + build_diamond = true + build_kaiju = true + build_malt = true + build_centrifuge = true + build_kraken2 = true + build_krakenuniq = true } diff --git a/conf/test_nothing.config b/conf/test_nothing.config new file mode 100644 index 0000000..b290707 --- /dev/null +++ b/conf/test_nothing.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/createtaxdb -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +includeConfig 'test.config' + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/samplesheets/test.csv' + + build_bracken = false + build_diamond = false + build_kaiju = false + build_malt = false + build_centrifuge = false + build_kraken2 = false + build_krakenuniq = false +} diff --git a/docs/images/nf-core-createtaxdb_logo_dark_tax.png b/docs/images/nf-core-createtaxdb_logo_dark_tax.png new file mode 100644 index 0000000..c419e4d Binary files /dev/null and b/docs/images/nf-core-createtaxdb_logo_dark_tax.png differ diff --git a/docs/images/nf-core-createtaxdb_logo_dark_tax.svg b/docs/images/nf-core-createtaxdb_logo_dark_tax.svg new file mode 100644 index 0000000..46f019b --- /dev/null +++ b/docs/images/nf-core-createtaxdb_logo_dark_tax.svg @@ -0,0 +1,1184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nf- + +core/ +createtaxdb + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/nf-core-createtaxdb_logo_light_tax.png b/docs/images/nf-core-createtaxdb_logo_light_tax.png new file mode 100644 index 0000000..13927f7 Binary files /dev/null and b/docs/images/nf-core-createtaxdb_logo_light_tax.png differ diff --git a/docs/images/nf-core-createtaxdb_logo_light_tax.svg b/docs/images/nf-core-createtaxdb_logo_light_tax.svg new file mode 100644 index 0000000..c226c97 --- /dev/null +++ b/docs/images/nf-core-createtaxdb_logo_light_tax.svg @@ -0,0 +1,1186 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nf- + core/ + createtaxdb + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/output.md b/docs/output.md index 8c113aa..691bb5c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,23 +12,16 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution - -### FastQC - -
-Output files - -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. - -
- -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +- [Bracken](#bracken) - Database files for Brakcen +- [Centrifuge](#centrifuge) - Database files for Centrifuge +- [DIAMOND](#diamond) - Database files for DIAMOND +- [Kaiju](#kaiju) - Database files for Kaiju +- [Kraken2](#kraken2) - Database files for Kraken2 +- [KrakenUniq](#krakenuniq) - Database files for KrakenUniq +- [MALT](#malt) - Database files for MALT ### MultiQC @@ -60,3 +53,122 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +### Bracken + +[Bracken](https://github.com/jenniferlu717/Bracken/)(Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample. + +
+Output files + +- `bracken/` + - `/` + - `database100mers.kmer_distrib`: Bracken kmer distribution file + - `database100mers.kraken`: Bracken index file + - `database.kraken`: Bracken database file + - `hash.k2d`: Kraken2 hash database file + - `opts.k2d`: Kraken2 opts database file + - `taxo.k2d`: Kraken2 taxo database file + - `library/`: Intermediate Kraken2 directory containing FASTAs and related files of added genomes + - `taxonomy/`: Intermediate Kraken2 directory containing taxonomy files of added genomes + - `seqid2taxid.map`: Intermediate Kraken2 file containing taxonomy files of added genomes + +
+ +Note that all intermediate files are required for Bracken2 database, even if Kraken2 itself only requires the `*.k2d` files. + +The resulting `/` directory can be given to Bracken itself with `bracken -d ` etc. + +### Centrifuge + +[Centrifuge](https://github.com/bbuchfink/diamond) is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples. + +
+Output files + +- `diamond/` + - `.*.cf`: Centrifuge database files + +
+ +A directory and `cf` files can be given to the Centrifuge command with `centrifuge -x ///` etc. + +### Diamond + +[DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment. + +
+Output files + +- `diamond/` + - `.dmnd`: DIAMOND dmnd database file + +
+ +The `dmnd` file can be given to one of the DIAMOND alignment commands with `diamond blast -d .dmnd` etc. + +### Kaiju + +[Kaiju](https://bioinformatics-centre.github.io/kaiju/) is a fast and sensitive taxonomic classification for metagenomics utilising nucletoide to protein translations. + +
+Output files + +- `kaiju/` + - `.fmi`: Kaiju FMI index file + +
+ +The `fmi` file can be given to Kaiju itself with `kaiju -f .fmi` etc. + +### Kraken2 + +[Kraken2](https://ccb.jhu.edu/software/kraken2/) is a taxonomic classification system using exact k-mer matches to achieve high accuracy and fast classification speeds. + +
+Output files + +- `kraken2/` + - `/` + - `hash.k2d`: Kraken2 hash database file + - `opts.k2d`: Kraken2 opts database file + - `taxo.k2d`: Kraken2 taxo database file + - `library/`: Intermediate directory containing FASTAs and related files of added genomes (only present if `--build_bracken` or `--kraken2_keepintermediate` supplied) + - `taxonomy/`: Intermediate directory containing taxonomy files of added genomes (only present if `--build_bracken` or `--kraken2_keepintermediate` supplied) + - `seqid2taxid.map`: Intermediate file containing taxonomy files of added genomes (only present if `--build_bracken` or `--kraken2_keepintermediate` supplied) + +
+ +The resulting `/` directory can be given to Kraken2 itself with `kraken2 --db ` etc. + +### KrakenUniq + +[KrakenUniq](https://github.com/fbreitwieser/krakenuniq) Metagenomics classifier with unique k-mer counting for more specific results. + +
+Output files + +- `kraken2/` + - `/` + - `database-build.log`: KrakenUniq build process log + - `database.idx`: KrakenUniq index file + - `database.kdb`: KrakenUniq database file + - `taxDB`: KrakenUniq taxonomy information file + +
+ +Note there may be additional files in this directory, however the ones listed above are the reportedly the required ones. + +### MALT + +[MALT](https://software-ab.cs.uni-tuebingen.de/download/malt) is a fast replacement for BLASTX, BLASTP and BLASTN, and provides both local and semi-global alignment capabilities. + +
+Output files + +- `malt/` + - `malt_index/`: directory containing MALT index files + +
+ +The `malt_index` directory can be given to MALT itself with `malt-run --index /` etc. diff --git a/main.nf b/main.nf index aa7623d..81aefc5 100644 --- a/main.nf +++ b/main.nf @@ -18,18 +18,6 @@ include { CREATETAXDB } from './workflows/createtaxdb' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -50,8 +38,24 @@ workflow NFCORE_CREATETAXDB { // // WORKFLOW: Run pipeline // + ch_samplesheet = samplesheet + ch_taxonomy_namesdmp = file(params.namesdmp) + ch_taxonomy_nodesdmp = file(params.nodesdmp) + ch_accession2taxid = file(params.accession2taxid) + ch_nucl2taxid = file(params.nucl2taxid) + ch_prot2taxid = file(params.prot2taxid) + ch_malt_mapdb = file(params.malt_mapdb) + + CREATETAXDB ( - samplesheet + ch_samplesheet, + ch_taxonomy_namesdmp, + ch_taxonomy_nodesdmp, + ch_accession2taxid, + ch_nucl2taxid, + ch_prot2taxid, + ch_malt_mapdb, + ) emit: multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html @@ -76,7 +80,7 @@ workflow { params.outdir, params.input ) - + // // WORKFLOW: Run main workflow // diff --git a/modules.json b/modules.json index 8dfc542..3b00438 100644 --- a/modules.json +++ b/modules.json @@ -5,12 +5,73 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "fastqc": { + "bracken/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_build_add_kraken2_bracken"] + }, + "cat/cat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "centrifuge/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "diamond/makedb": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "kaiju/mkfmi": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "kraken2/add": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_build_add_kraken2_bracken"] + }, + "kraken2/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_build_add_kraken2_bracken"] + }, + "krakenuniq/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/krakenuniq/build/krakenuniq-build.diff" + }, + "malt/build": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "multiqc": { + "branch": "master", + "git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c", + "installed_by": ["modules"] + }, + "pigz/compress": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "unzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] @@ -19,6 +80,11 @@ }, "subworkflows": { "nf-core": { + "fasta_build_add_kraken2_bracken": { + "branch": "master", + "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "d20fb2a9cc3e2835e9d067d1046a63252eb17352", diff --git a/modules/nf-core/bracken/build/environment.yml b/modules/nf-core/bracken/build/environment.yml new file mode 100644 index 0000000..d9ea9a6 --- /dev/null +++ b/modules/nf-core/bracken/build/environment.yml @@ -0,0 +1,6 @@ +--- +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::bracken=2.9" diff --git a/modules/nf-core/bracken/build/main.nf b/modules/nf-core/bracken/build/main.nf new file mode 100644 index 0000000..a2ee2c8 --- /dev/null +++ b/modules/nf-core/bracken/build/main.nf @@ -0,0 +1,48 @@ +process BRACKEN_BUILD { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bracken:2.9--py38h2494328_0': + 'biocontainers/bracken:2.9--py38h2494328_0' }" + + input: + tuple val(meta), path(kraken2db) + + output: + tuple val(meta), path(kraken2db , includeInputs: true), emit: db + tuple val(meta), path("${kraken2db}/database*", includeInputs: true), emit: bracken_files + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + bracken-build \\ + $args \\ + -t $task.cpus \\ + -d $kraken2db + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bracken: \$(echo \$(bracken -v) | cut -f2 -d'v') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${kraken2db}/database100mers.kmer_distrib + touch ${kraken2db}/database100mers.kraken + touch ${kraken2db}/database.kraken + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bracken: \$(echo \$(bracken -v) | cut -f2 -d'v') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bracken/build/meta.yml b/modules/nf-core/bracken/build/meta.yml new file mode 100644 index 0000000..b921941 --- /dev/null +++ b/modules/nf-core/bracken/build/meta.yml @@ -0,0 +1,58 @@ +name: "bracken_build" +description: Extends a Kraken2 database to be compatible with Bracken +keywords: + - kraken2 + - bracken + - database + - build +tools: + - "bracken": + description: "Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly + accurate statistical method that computes the abundance of species in DNA sequences + from a metagenomics sample." + homepage: "https://ccb.jhu.edu/software/bracken/" + documentation: "https://ccb.jhu.edu/software/bracken/" + tool_dev_url: "https://github.com/jenniferlu717/Bracken/" + doi: "10.7717/peerj-cs.104 " + licence: ["GPL v3"] + identifier: biotools:bracken + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - kraken2db: + type: directory + description: A Kraken2 database directory + pattern: "*/" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "kraken2db , includeInputs: true": + type: directory + description: A Kraken2 database directory with required bracken files in side + pattern: "*/" + - bracken_files: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - '${kraken2db}/database*", includeInputs: true': + type: directory + description: Bracken files required to extend the Kraken2 database + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/bracken/build/tests/main.nf.test b/modules/nf-core/bracken/build/tests/main.nf.test new file mode 100644 index 0000000..781c151 --- /dev/null +++ b/modules/nf-core/bracken/build/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process BRACKEN_BUILD" + script "../main.nf" + process "BRACKEN_BUILD" + + tag "modules" + tag "modules_nfcore" + tag "bracken" + tag "bracken/build" + tag "untar" + + test("kraken2 - db") { + + setup { + run ("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR.out.untar + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file("${process.out.db[0][1]}/database100mers.kmer_distrib").name, + file("${process.out.db[0][1]}/database100mers.kraken").name, + file("${process.out.db[0][1]}/database.kraken").name, + file("${process.out.bracken_files[0][1]}/database100mers.kmer_distrib").name, + file("${process.out.bracken_files[0][1]}/database100mers.kraken").name, + file("${process.out.bracken_files[0][1]}/database.kraken").name, + process.out.versions + ).match() + } + ) + } + + } + + test("kraken2 - db - stub") { + + options "-stub" + + setup { + run ("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR.out.untar + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bracken/build/tests/main.nf.test.snap b/modules/nf-core/bracken/build/tests/main.nf.test.snap new file mode 100644 index 0000000..910e314 --- /dev/null +++ b/modules/nf-core/bracken/build/tests/main.nf.test.snap @@ -0,0 +1,129 @@ +{ + "kraken2 - db": { + "content": [ + "database100mers.kmer_distrib", + "database100mers.kraken", + "database.kraken", + "database100mers.kmer_distrib", + "database100mers.kraken", + "database.kraken", + [ + "versions.yml:md5,925c6ae1387eaf6dbd14656125bc6d9b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:45:17.37462" + }, + "kraken2 - db - stub": { + "content": [ + { + "0": [ + [ + { + "id": "db" + }, + [ + "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + [ + "G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e", + "G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e", + "z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "db" + }, + [ + "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + "versions.yml:md5,925c6ae1387eaf6dbd14656125bc6d9b" + ], + "bracken_files": [ + [ + { + "id": "db" + }, + [ + "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "db": [ + [ + { + "id": "db" + }, + [ + "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e", + "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + [ + "G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e", + "G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e", + "z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e", + "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,925c6ae1387eaf6dbd14656125bc6d9b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:45:26.452181" + } +} \ No newline at end of file diff --git a/modules/nf-core/bracken/build/tests/tags.yml b/modules/nf-core/bracken/build/tests/tags.yml new file mode 100644 index 0000000..92d7c26 --- /dev/null +++ b/modules/nf-core/bracken/build/tests/tags.yml @@ -0,0 +1,2 @@ +bracken/build: + - "modules/nf-core/bracken/build/**" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/cat/cat/environment.yml similarity index 65% rename from modules/nf-core/fastqc/environment.yml rename to modules/nf-core/cat/cat/environment.yml index 691d4c7..9b01c86 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/cat/cat/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::fastqc=0.12.1 + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..2862c64 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..81778a0 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..9cb1617 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..b7623ee --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000..ec26b0f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000..fbc7978 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000..37b578f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/centrifuge/build/environment.yml b/modules/nf-core/centrifuge/build/environment.yml new file mode 100644 index 0000000..4259289 --- /dev/null +++ b/modules/nf-core/centrifuge/build/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::centrifuge=1.0.4.1 diff --git a/modules/nf-core/centrifuge/build/main.nf b/modules/nf-core/centrifuge/build/main.nf new file mode 100644 index 0000000..c741e53 --- /dev/null +++ b/modules/nf-core/centrifuge/build/main.nf @@ -0,0 +1,58 @@ +process CENTRIFUGE_BUILD { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4.1--hdcf5f25_1' : + 'biocontainers/centrifuge:1.0.4.1--hdcf5f25_1' }" + + input: + tuple val(meta), path(fasta) + path conversion_table + path taxonomy_tree + path name_table + path size_table + + output: + tuple val(meta), path("*.cf") , emit: cf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def size_table_cmd = size_table ? "--size_table ${size_table}" : "" + """ + centrifuge-build \\ + -p $task.cpus \\ + $fasta \\ + ${prefix} \\ + --conversion-table $conversion_table \\ + --taxonomy-tree $taxonomy_tree \\ + --name-table $name_table \\ + ${size_table_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.1.cf + touch ${prefix}.2.cf + touch ${prefix}.3.cf + touch ${prefix}.4.cf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/centrifuge/build/meta.yml b/modules/nf-core/centrifuge/build/meta.yml new file mode 100644 index 0000000..65a34f6 --- /dev/null +++ b/modules/nf-core/centrifuge/build/meta.yml @@ -0,0 +1,66 @@ +name: centrifuge_build +description: Build centrifuge database for taxonomic profiling +keywords: + - database + - metagenomics + - build + - db + - fasta +tools: + - centrifuge: + description: Classifier for metagenomic sequences + homepage: https://ccb.jhu.edu/software/centrifuge/ + documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml + doi: 10.1101/gr.210641.116 + licence: ["GPL v3"] + identifier: biotools:centrifuge + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file containing sequences to be used in centrifuge database. + pattern: "*.{fasta,fna}" + - - conversion_table: + type: file + description: A tab-separated file with sequence ID to taxonomy ID mapping + pattern: "*.{map}" + - - taxonomy_tree: + type: file + description: A \t|\t-separated file mapping taxonomy. Typically nodes.dmp from + the NCBI taxonomy dump. Links taxonomy IDs to their parents + pattern: "*.{dmp}" + - - name_table: + type: file + description: A '|'-separated file mapping taxonomy IDs to a name. Typically + names.dmp from the NCBI taxonomy dump. Links taxonomy IDs to their scientific + name + pattern: "*.{dmp}" + - - size_table: + type: file + description: Optional list of taxonomic IDs and lengths of the sequences belonging + to the same taxonomic IDs. + pattern: "*" +output: + - cf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cf": + type: file + description: Index files for the centrifuge database + pattern: "*.{cf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sarah-buddle" + - "@jfy133" diff --git a/modules/nf-core/centrifuge/build/tests/main.nf.test b/modules/nf-core/centrifuge/build/tests/main.nf.test new file mode 100644 index 0000000..07d584f --- /dev/null +++ b/modules/nf-core/centrifuge/build/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process CENTRIFUGE_BUILD" + script "../main.nf" + process "CENTRIFUGE_BUILD" + + tag "modules" + tag "modules_nfcore" + tag "centrifuge" + tag "centrifuge/build" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/seqid2taxid.map', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/nodes.dmp', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/names.dmp', checkIfExists: true) + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/seqid2taxid.map', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/nodes.dmp', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/names.dmp', checkIfExists: true) + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuge/build/tests/main.nf.test.snap b/modules/nf-core/centrifuge/build/tests/main.nf.test.snap new file mode 100644 index 0000000..fa7d1dc --- /dev/null +++ b/modules/nf-core/centrifuge/build/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test.1.cf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.2.cf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.3.cf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.4.cf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,87203c73096e9db92fe7555781a90d93" + ], + "cf": [ + [ + { + "id": "test" + }, + [ + "test.1.cf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.2.cf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.3.cf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.4.cf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,87203c73096e9db92fe7555781a90d93" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T10:10:35.22038034" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test.1.cf:md5,07d811cd4e350d56267183d2ac7023a5", + "test.2.cf:md5,23551529b34af3585d35f0e36461d9df", + "test.3.cf:md5,445a974b5d64518944cb539df4149850", + "test.4.cf:md5,c609dc3c247d5f3ac5426079de7fbe36" + ] + ] + ], + "1": [ + "versions.yml:md5,87203c73096e9db92fe7555781a90d93" + ], + "cf": [ + [ + { + "id": "test" + }, + [ + "test.1.cf:md5,07d811cd4e350d56267183d2ac7023a5", + "test.2.cf:md5,23551529b34af3585d35f0e36461d9df", + "test.3.cf:md5,445a974b5d64518944cb539df4149850", + "test.4.cf:md5,c609dc3c247d5f3ac5426079de7fbe36" + ] + ] + ], + "versions": [ + "versions.yml:md5,87203c73096e9db92fe7555781a90d93" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T10:10:19.439629103" + } +} \ No newline at end of file diff --git a/modules/nf-core/centrifuge/build/tests/tags.yml b/modules/nf-core/centrifuge/build/tests/tags.yml new file mode 100644 index 0000000..eeef98a --- /dev/null +++ b/modules/nf-core/centrifuge/build/tests/tags.yml @@ -0,0 +1,2 @@ +centrifuge/build: + - "modules/nf-core/centrifuge/build/**" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9d79af9 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 0000000..105f926 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 0000000..dc1e412 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline + template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline + template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] + identifier: "" +input: + - - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" +output: + - yml: + - software_versions.yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + - software_versions_mqc.yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 0000000..b83b32c --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + +import platform +from textwrap import dedent + +import yaml + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/diamond/makedb/environment.yml b/modules/nf-core/diamond/makedb/environment.yml new file mode 100644 index 0000000..950c3c5 --- /dev/null +++ b/modules/nf-core/diamond/makedb/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::diamond=2.1.8 diff --git a/modules/nf-core/diamond/makedb/main.nf b/modules/nf-core/diamond/makedb/main.nf new file mode 100644 index 0000000..94011cf --- /dev/null +++ b/modules/nf-core/diamond/makedb/main.nf @@ -0,0 +1,65 @@ +process DIAMOND_MAKEDB { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/diamond:2.1.8--h43eeafb_0' : + 'biocontainers/diamond:2.1.8--h43eeafb_0' }" + + input: + tuple val(meta), path(fasta) + path taxonmap + path taxonnodes + path taxonnames + + output: + tuple val(meta), path("*.dmnd"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + def insert_taxonmap = taxonmap ? "--taxonmap $taxonmap" : "" + def insert_taxonnodes = taxonnodes ? "--taxonnodes $taxonnodes" : "" + def insert_taxonnames = taxonnames ? "--taxonnames $taxonnames" : "" + + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + diamond \\ + makedb \\ + --threads ${task.cpus} \\ + --in ${fasta_name} \\ + -d ${prefix} \\ + ${args} \\ + ${insert_taxonmap} \\ + ${insert_taxonnodes} \\ + ${insert_taxonnames} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.dmnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/diamond/makedb/meta.yml b/modules/nf-core/diamond/makedb/meta.yml new file mode 100644 index 0000000..71de32e --- /dev/null +++ b/modules/nf-core/diamond/makedb/meta.yml @@ -0,0 +1,61 @@ +name: diamond_makedb +description: Builds a DIAMOND database +keywords: + - fasta + - diamond + - index + - database +tools: + - diamond: + description: Accelerated BLAST compatible local sequence aligner + homepage: https://github.com/bbuchfink/diamond + documentation: https://github.com/bbuchfink/diamond/wiki + tool_dev_url: https://github.com/bbuchfink/diamond + doi: "10.1038/s41592-021-01101-x" + licence: ["GPL v3.0"] + identifier: biotools:diamond +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - taxonmap: + type: file + description: Optional mapping file of NCBI protein accession numbers to taxon + ids (gzip compressed), required for taxonomy functionality. + pattern: "*.gz" + - - taxonnodes: + type: file + description: Optional NCBI taxonomy nodes.dmp file, required for taxonomy functionality. + pattern: "*.dmp" + - - taxonnames: + type: file + description: Optional NCBI taxonomy names.dmp file, required for taxonomy functionality. + pattern: "*.dmp" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.dmnd": + type: file + description: File of the indexed DIAMOND database + pattern: "*.dmnd" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@spficklin" +maintainers: + - "@spficklin" + - "@vagkaratzas" + - "@jfy133" diff --git a/modules/nf-core/diamond/makedb/tests/main.nf.test b/modules/nf-core/diamond/makedb/tests/main.nf.test new file mode 100644 index 0000000..f27e142 --- /dev/null +++ b/modules/nf-core/diamond/makedb/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process DIAMOND_MAKEDB" + script "../main.nf" + process "DIAMOND_MAKEDB" + tag "modules" + tag "modules_nfcore" + tag "diamond" + tag "diamond/makedb" + + test("Should build a DIAMOND db file from a fasta file without taxonomic information") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should build a DIAMOND db file from a zipped fasta file without taxonomic information") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should build a DIAMOND db file from a fasta file with taxonomic information") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/diamond/makedb/tests/main.nf.test.snap b/modules/nf-core/diamond/makedb/tests/main.nf.test.snap new file mode 100644 index 0000000..5abefce --- /dev/null +++ b/modules/nf-core/diamond/makedb/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "Should build a DIAMOND db file from a fasta file with taxonomic information": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dmnd:md5,9d57aa88cd1766adfda8360876fc0e4f" + ] + ], + "1": [ + "versions.yml:md5,29a8cea287d2206b9a837d2750de00c4" + ], + "db": [ + [ + { + "id": "test" + }, + "test.dmnd:md5,9d57aa88cd1766adfda8360876fc0e4f" + ] + ], + "versions": [ + "versions.yml:md5,29a8cea287d2206b9a837d2750de00c4" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:35:11.221381" + }, + "Should build a DIAMOND db file from a fasta file without taxonomic information": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dmnd:md5,6039420745dd4db6e761244498460ae1" + ] + ], + "1": [ + "versions.yml:md5,29a8cea287d2206b9a837d2750de00c4" + ], + "db": [ + [ + { + "id": "test" + }, + "test.dmnd:md5,6039420745dd4db6e761244498460ae1" + ] + ], + "versions": [ + "versions.yml:md5,29a8cea287d2206b9a837d2750de00c4" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:35:00.595693" + }, + "Should build a DIAMOND db file from a zipped fasta file without taxonomic information": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dmnd:md5,6039420745dd4db6e761244498460ae1" + ] + ], + "1": [ + "versions.yml:md5,29a8cea287d2206b9a837d2750de00c4" + ], + "db": [ + [ + { + "id": "test" + }, + "test.dmnd:md5,6039420745dd4db6e761244498460ae1" + ] + ], + "versions": [ + "versions.yml:md5,29a8cea287d2206b9a837d2750de00c4" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:35:05.494933" + } +} \ No newline at end of file diff --git a/modules/nf-core/diamond/makedb/tests/tags.yml b/modules/nf-core/diamond/makedb/tests/tags.yml new file mode 100644 index 0000000..6fc7762 --- /dev/null +++ b/modules/nf-core/diamond/makedb/tests/tags.yml @@ -0,0 +1,2 @@ +diamond/makedb: + - modules/nf-core/diamond/makedb/** diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index d8989f4..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,64 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - - // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) - // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 - // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus - // FastQC memory value allowed range (100 - 10000) - def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) - - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - - fastqc \\ - $args \\ - --threads $task.cpus \\ - --memory $fastqc_memory \\ - $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 4827da7..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] - identifier: biotools:fastqc -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - html: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.html": - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.zip": - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index e9d79a0..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,309 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("sarscov2 single-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("") }, - { assert path(process.out.html[0][1][1]).text.contains("") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 interleaved [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [bam]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 multiple [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("") }, - { assert path(process.out.html[0][1][1]).text.contains("") }, - { assert path(process.out.html[0][1][2]).text.contains("") }, - { assert path(process.out.html[0][1][3]).text.contains("") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 custom_prefix") { - - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 single-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 interleaved [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [bam] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 multiple [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 custom_prefix - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index d5db309..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,392 +0,0 @@ -{ - "sarscov2 custom_prefix": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:16.374038" - }, - "sarscov2 single-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:24.993809" - }, - "sarscov2 custom_prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:10.93942" - }, - "sarscov2 interleaved [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:42.355718" - }, - "sarscov2 paired-end [bam]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:53.276274" - }, - "sarscov2 multiple [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:05.527626" - }, - "sarscov2 paired-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:31.188871" - }, - "sarscov2 paired-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:34.273566" - }, - "sarscov2 multiple [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:02.304411" - }, - "sarscov2 single-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:19.095607" - }, - "sarscov2 interleaved [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:44.640184" - }, - "sarscov2 paired-end [bam] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:53.550742" - } -} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml deleted file mode 100644 index 7834294..0000000 --- a/modules/nf-core/fastqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastqc: - - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..c779485 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 0000000..5e67e3b --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 0000000..9066c03 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..776211a --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..069967e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 0000000..dec7764 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/kaiju/mkfmi/environment.yml b/modules/nf-core/kaiju/mkfmi/environment.yml new file mode 100644 index 0000000..3bb316c --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::kaiju=1.10.0 diff --git a/modules/nf-core/kaiju/mkfmi/main.nf b/modules/nf-core/kaiju/mkfmi/main.nf new file mode 100644 index 0000000..d08be63 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/main.nf @@ -0,0 +1,48 @@ +process KAIJU_MKFMI { + tag "$meta.id" + label 'process_high' + + conda "bioconda::kaiju=1.10.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.10.0--h43eeafb_0': + 'biocontainers/kaiju:1.10.0--h43eeafb_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fmi"), emit: fmi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kaiju-mkbwt \\ + $args \\ + -n $task.cpus \\ + -o ${prefix} \\ + ${fasta} + kaiju-mkfmi ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fmi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/kaiju/mkfmi/meta.yml b/modules/nf-core/kaiju/mkfmi/meta.yml new file mode 100644 index 0000000..1653270 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/meta.yml @@ -0,0 +1,46 @@ +name: "kaiju_mkfmi" +description: Make Kaiju FMI-index file from a protein FASTA file +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling + - database + - index +tools: + - "kaiju": + description: "Fast and sensitive taxonomic classification for metagenomics" + homepage: "https://bioinformatics-centre.github.io/kaiju/" + documentation: "https://github.com/bioinformatics-centre/kaiju/blob/master/README.md" + tool_dev_url: "https://github.com/bioinformatics-centre/kaiju" + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] + identifier: biotools:kaiju +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Uncompressed Protein FASTA file (mandatory) + pattern: "*.{fa,faa,fasta}" +output: + - fmi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.fmi": + type: file + description: Kaiju FM-index file + pattern: "*.{fmi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alxndrdiaz" diff --git a/modules/nf-core/kaiju/mkfmi/tests/main.nf.test b/modules/nf-core/kaiju/mkfmi/tests/main.nf.test new file mode 100644 index 0000000..d8062b0 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process KAIJU_MKFMI" + script "../main.nf" + process "KAIJU_MKFMI" + + tag "modules" + tag "modules_nfcore" + tag "kaiju" + tag "kaiju/mkfmi" + + test("sarscov2 - proteome - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.fmi[0][1]).name).match() } + ) + } + + } + +} diff --git a/modules/nf-core/kaiju/mkfmi/tests/main.nf.test.snap b/modules/nf-core/kaiju/mkfmi/tests/main.nf.test.snap new file mode 100644 index 0000000..9120a16 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + "test.fmi" + ], + "timestamp": "2024-01-20T16:27:00.670884904" + }, + "sarscov2 - proteome - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fmi:md5,54fd89f5e4eab61af30175e8aa389598" + ] + ], + "1": [ + "versions.yml:md5,3cbd427d0187ffee188347830d33dc12" + ], + "fmi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fmi:md5,54fd89f5e4eab61af30175e8aa389598" + ] + ], + "versions": [ + "versions.yml:md5,3cbd427d0187ffee188347830d33dc12" + ] + } + ], + "timestamp": "2024-01-20T16:26:48.062489887" + } +} \ No newline at end of file diff --git a/modules/nf-core/kaiju/mkfmi/tests/nextflow.config b/modules/nf-core/kaiju/mkfmi/tests/nextflow.config new file mode 100644 index 0000000..ae99671 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: KAIJU_MKFMI { + ext.args = '-a ACDEFGHIKLMNPQRSTVWY' + } +} \ No newline at end of file diff --git a/modules/nf-core/kaiju/mkfmi/tests/tags.yml b/modules/nf-core/kaiju/mkfmi/tests/tags.yml new file mode 100644 index 0000000..ee1305f --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/tests/tags.yml @@ -0,0 +1,2 @@ +kaiju/mkfmi: + - "modules/nf-core/kaiju/mkfmi/**" diff --git a/modules/nf-core/kraken2/add/environment.yml b/modules/nf-core/kraken2/add/environment.yml new file mode 100644 index 0000000..9e63d98 --- /dev/null +++ b/modules/nf-core/kraken2/add/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::kraken2=2.1.3" + - "coreutils=9.4" + - "pigz=2.8" diff --git a/modules/nf-core/kraken2/add/main.nf b/modules/nf-core/kraken2/add/main.nf new file mode 100644 index 0000000..36d3abb --- /dev/null +++ b/modules/nf-core/kraken2/add/main.nf @@ -0,0 +1,56 @@ +process KRAKEN2_ADD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' : + 'biocontainers/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' }" + + input: + tuple val(meta), path(fasta) + path taxonomy_names, stageAs: 'taxonomy/names.dmp' + path taxonomy_nodes, stageAs: 'taxonomy/nodes.dmp' + path accession2taxid, stageAs: 'taxonomy/*' + + output: + tuple val(meta), path("$prefix"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + mv "taxonomy" ${prefix} + + echo ${fasta} |\\ + tr -s " " "\\012" |\\ + xargs -I {} -n1 kraken2-build \\ + --add-to-library {} \\ + --db ${prefix} \\ + --threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir "$prefix" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/kraken2/add/meta.yml b/modules/nf-core/kraken2/add/meta.yml new file mode 100644 index 0000000..e1ad7d9 --- /dev/null +++ b/modules/nf-core/kraken2/add/meta.yml @@ -0,0 +1,61 @@ +name: kraken2_add +description: Adds fasta files to a Kraken2 taxonomic database +keywords: + - metagenomics + - db + - classification + - build + - kraken2 + - add +tools: + - kraken2: + description: "Kraken2 is a system for assigning taxonomic labels to short DNA + sequences, usually obtained through metagenomic studies." + homepage: https://ccb.jhu.edu/software/kraken2/ + documentation: https://github.com/DerrickWood/kraken2/wiki/Manual + tool_dev_url: "https://github.com/DerrickWood/kraken2" + doi: 10.1186/s13059-019-1891-0 + licence: ["MIT"] + identifier: biotools:kraken2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fasta: + type: file + description: fasta file that will be added to the database + pattern: "*.{fa,fasta,fna,ffn}" + - - taxonomy_names: + type: file + description: used for associating sequences with taxonomy IDs + pattern: "*.dmp" + - - taxonomy_nodes: + type: file + description: tree nodes using NCBI taxonomy nomenclature + pattern: "*.dmp" + - - accession2taxid: + type: file + description: associates sequence accession IDs to taxonomy IDs + pattern: "*.accession2taxid" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - $prefix: + type: directory + description: contains required files to build the database + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alxndrdiaz" +maintainers: + - "@alxndrdiaz" diff --git a/modules/nf-core/kraken2/add/tests/main.nf.test b/modules/nf-core/kraken2/add/tests/main.nf.test new file mode 100644 index 0000000..18e1a9e --- /dev/null +++ b/modules/nf-core/kraken2/add/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process KRAKEN2_ADD" + script "../main.nf" + process "KRAKEN2_ADD" + tag "kraken2" + tag "kraken2/add" + tag "gunzip" + tag "modules" + tag "modules_nfcore" + + setup { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file( + params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot.accession2taxid.gz", + checkIfExists: true + ) + ]) + """ + } + } + } + + test("sarscov2 protein_db") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/proteome.fasta", checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_names.dmp", checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_nodes.dmp", checkIfExists: true) + input[3] = GUNZIP.out.gunzip.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.db.get(0).get(1) ==~ ".*/test" }, + { assert snapshot ( + path("${process.out.db[0][1]}/library/added/").list().size(), + path("${process.out.db[0][1]}/taxonomy/") + ).match() + } + ) + } + + } + +test("sarscov2 protein_db stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/proteome.fasta", checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_names.dmp", checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_nodes.dmp", checkIfExists: true) + input[3] = GUNZIP.out.gunzip.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot (process.out).match() + } + ) + } + + } +} diff --git a/modules/nf-core/kraken2/add/tests/main.nf.test.snap b/modules/nf-core/kraken2/add/tests/main.nf.test.snap new file mode 100644 index 0000000..5050d69 --- /dev/null +++ b/modules/nf-core/kraken2/add/tests/main.nf.test.snap @@ -0,0 +1,54 @@ +{ + "sarscov2 protein_db": { + "content": [ + 6, + [ + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3", + "prot.accession2taxid:md5,c0f96ba5dbb00150b4b805ba6dab7bea" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T20:20:05.09765" + }, + "sarscov2 protein_db stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,9a7b40921622d8c873fb3bfd8bac4c3d" + ], + "db": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,9a7b40921622d8c873fb3bfd8bac4c3d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T09:07:41.484004342" + } +} \ No newline at end of file diff --git a/modules/nf-core/kraken2/add/tests/tags.yml b/modules/nf-core/kraken2/add/tests/tags.yml new file mode 100644 index 0000000..3ee6390 --- /dev/null +++ b/modules/nf-core/kraken2/add/tests/tags.yml @@ -0,0 +1,3 @@ +kraken2/add: + - modules/nf-core/kraken2/add/** + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/kraken2/build/environment.yml b/modules/nf-core/kraken2/build/environment.yml new file mode 100644 index 0000000..9e63d98 --- /dev/null +++ b/modules/nf-core/kraken2/build/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::kraken2=2.1.3" + - "coreutils=9.4" + - "pigz=2.8" diff --git a/modules/nf-core/kraken2/build/main.nf b/modules/nf-core/kraken2/build/main.nf new file mode 100644 index 0000000..caba65f --- /dev/null +++ b/modules/nf-core/kraken2/build/main.nf @@ -0,0 +1,51 @@ +process KRAKEN2_BUILD { + tag "$meta.id" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0': + 'biocontainers/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' }" + + input: + tuple val(meta), path(db) + val cleaning + + output: + tuple val(meta), path("$prefix"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + runclean = cleaning ? "kraken2-build --clean --db ${db}" : "" + """ + kraken2-build \\ + --build \\ + $args \\ + --threads ${task.cpus} \\ + --db ${db} + $runclean + if [[ \$(basename ${db}) != "${prefix}" ]]; then + mv ${db}/* ${prefix} + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p "$prefix" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/kraken2/build/meta.yml b/modules/nf-core/kraken2/build/meta.yml new file mode 100644 index 0000000..6d7da1d --- /dev/null +++ b/modules/nf-core/kraken2/build/meta.yml @@ -0,0 +1,53 @@ +name: "kraken2_build" +description: Builds Kraken2 database +keywords: + - metagenomics + - db + - classification + - build + - kraken2 +tools: + - kraken2: + description: "Kraken2 is a system for assigning taxonomic labels to short DNA + sequences, usually obtained through metagenomic studies." + homepage: https://ccb.jhu.edu/software/kraken2/ + documentation: https://github.com/DerrickWood/kraken2/wiki/Manual + tool_dev_url: "https://github.com/DerrickWood/kraken2" + doi: 10.1186/s13059-019-1891-0 + licence: ["MIT"] + args_id: "$args" + identifier: biotools:kraken2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - db: + type: directory + description: contains required files to build the database + pattern: "*/" + - - cleaning: + type: boolean + description: activate or deactivate (true or false) cleaning of intermediate + files +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - $prefix: + type: directory + description: contains the database that can be used to perform taxonomic classification + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alxndrdiaz" +maintainers: + - "@alxndrdiaz" diff --git a/modules/nf-core/kraken2/build/tests/main.nf.test b/modules/nf-core/kraken2/build/tests/main.nf.test new file mode 100644 index 0000000..9a8d454 --- /dev/null +++ b/modules/nf-core/kraken2/build/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process KRAKEN2_BUILD" + script "../main.nf" + process "KRAKEN2_BUILD" + config "./nextflow.config" + tag "kraken2" + tag "kraken2/build" + tag "kraken2/add" + tag "gunzip" + tag "modules" + tag "modules_nfcore" + + setup { + + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file( + params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot.accession2taxid.gz", + checkIfExists: true + ) + ]) + """ + } + } + + run("KRAKEN2_ADD") { + script "modules/nf-core/kraken2/add/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/proteome.fasta", checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_names.dmp", checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_nodes.dmp", checkIfExists: true) + input[3] = GUNZIP.out.gunzip.map{ it[1] } + """ + } + } + + } + + test("sarscov2 protein_db") { + + when { + process { + """ + input[0] = KRAKEN2_ADD.out.db + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.db.get(0).get(1) ==~ ".*/test" }, + { assert snapshot ( + path("${process.out.db[0][1]}/hash.k2d"), + path("${process.out.db[0][1]}/taxo.k2d"), + file("${process.out.db[0][1]}/opts.k2d").name, + file("${process.out.db[0][1]}/unmapped.txt").name + ).match() + } + ) + } + + } + + test("sarscov2 protein_db stub") { + + options "-stub" + + when { + process { + """ + input[0] = KRAKEN2_ADD.out.db + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot (process.out).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/kraken2/build/tests/main.nf.test.snap b/modules/nf-core/kraken2/build/tests/main.nf.test.snap new file mode 100644 index 0000000..fb87c51 --- /dev/null +++ b/modules/nf-core/kraken2/build/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "sarscov2 protein_db": { + "content": [ + "hash.k2d:md5,e9984a5e98f87c0488cb5e7618d5bbe0", + "taxo.k2d:md5,29d65b1796e09191fd7bdcaa24130459", + "opts.k2d", + "unmapped.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T08:10:48.644001909" + }, + "sarscov2 protein_db stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,df044948ec1fdb342a1d6fadde84b334" + ], + "db": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,df044948ec1fdb342a1d6fadde84b334" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T17:41:57.715748" + } +} \ No newline at end of file diff --git a/modules/nf-core/kraken2/build/tests/nextflow.config b/modules/nf-core/kraken2/build/tests/nextflow.config new file mode 100644 index 0000000..11ac39e --- /dev/null +++ b/modules/nf-core/kraken2/build/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: KRAKEN2_BUILD { + ext.args = '--protein' + } + +} diff --git a/modules/nf-core/kraken2/build/tests/tags.yml b/modules/nf-core/kraken2/build/tests/tags.yml new file mode 100644 index 0000000..4a5e61d --- /dev/null +++ b/modules/nf-core/kraken2/build/tests/tags.yml @@ -0,0 +1,4 @@ +kraken2/build: + - modules/nf-core/kraken2/build/** + - modules/nf-core/kraken2/add/** + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/krakenuniq/build/environment.yml b/modules/nf-core/krakenuniq/build/environment.yml new file mode 100644 index 0000000..bbf85c3 --- /dev/null +++ b/modules/nf-core/krakenuniq/build/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::krakenuniq=1.0.4 diff --git a/modules/nf-core/krakenuniq/build/krakenuniq-build.diff b/modules/nf-core/krakenuniq/build/krakenuniq-build.diff new file mode 100644 index 0000000..9fd794c --- /dev/null +++ b/modules/nf-core/krakenuniq/build/krakenuniq-build.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/krakenuniq/build' +--- modules/nf-core/krakenuniq/build/main.nf ++++ modules/nf-core/krakenuniq/build/main.nf +@@ -8,7 +8,7 @@ + 'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2' }" + + input: +- tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy"), path(custom_seqid2taxid) ++ tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy/*"), path(custom_seqid2taxid) + + output: + tuple val(meta), path("$prefix/"), emit: db + +************************************************************ diff --git a/modules/nf-core/krakenuniq/build/main.nf b/modules/nf-core/krakenuniq/build/main.nf new file mode 100644 index 0000000..c55dd4d --- /dev/null +++ b/modules/nf-core/krakenuniq/build/main.nf @@ -0,0 +1,37 @@ +process KRAKENUNIQ_BUILD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h6dccd9a_2': + 'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2' }" + + input: + tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy/*"), path(custom_seqid2taxid) + + output: + tuple val(meta), path("$prefix/"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + custom_db = custom_library_dir ? "mkdir $prefix && mv library taxonomy $custom_seqid2taxid $prefix" : "" + """ + $custom_db + + krakenuniq-build \\ + $args \\ + --threads ${task.cpus} \\ + --db ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/krakenuniq/build/meta.yml b/modules/nf-core/krakenuniq/build/meta.yml new file mode 100644 index 0000000..bafc3cf --- /dev/null +++ b/modules/nf-core/krakenuniq/build/meta.yml @@ -0,0 +1,56 @@ +name: "krakenuniq_build" +description: Download and build (custom) KrakenUniq databases +keywords: + - metagenomics + - krakenuniq + - database + - build + - ncbi +tools: + - "krakenuniq": + description: "Metagenomics classifier with unique k-mer counting for more specific + results" + homepage: https://github.com/fbreitwieser/krakenuniq + documentation: https://github.com/fbreitwieser/krakenuniq + tool_dev_url: https://github.com/fbreitwieser/krakenuniq + doi: 10.1186/s13059-018-1568-0 + licence: ["MIT"] + identifier: biotools:KrakenUniq +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - custom_library_dir: + type: directory + description: Optional directory containing custom fasta files for database build + pattern: "*" + - custom_taxonomy_dir: + type: directory + description: Optional directory containing custom taxonomy files for database + build + pattern: "*" + - custom_seqid2taxid: + type: file + description: custom seqid2taxid +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix/: + type: directory + description: Directory containing KrakenUniq database + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/malt/build/environment.yml b/modules/nf-core/malt/build/environment.yml new file mode 100644 index 0000000..15a7750 --- /dev/null +++ b/modules/nf-core/malt/build/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::malt=0.61 diff --git a/modules/nf-core/malt/build/main.nf b/modules/nf-core/malt/build/main.nf new file mode 100644 index 0000000..6f05e9e --- /dev/null +++ b/modules/nf-core/malt/build/main.nf @@ -0,0 +1,54 @@ +process MALT_BUILD { + + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' : + 'biocontainers/malt:0.61--hdfd78af_0' }" + + input: + path fastas + path gff + path mapping_db + + output: + path "malt_index/" , emit: index + path "versions.yml" , emit: versions + path "malt-build.log", emit: log + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def igff = gff ? "-igff ${gff}" : "" + + """ + malt-build \\ + -v \\ + --input ${fastas.join(' ')} \\ + $igff \\ + -d 'malt_index/' \\ + -t $task.cpus \\ + $args \\ + -mdb ${mapping_db}/*.db |&tee malt-build.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + malt: \$(malt-build --help |& tail -n 3 | head -n 1 | cut -f 2 -d'(' | cut -f 1 -d ',' | cut -d ' ' -f 2) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch malt-build.log + mkdir malt_index/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + malt: \$(malt-run --help 2>&1 | grep -o 'version.* ' | cut -f 1 -d ',' | cut -f2 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/nf-core/malt/build/meta.yml b/modules/nf-core/malt/build/meta.yml new file mode 100644 index 0000000..da1fc6c --- /dev/null +++ b/modules/nf-core/malt/build/meta.yml @@ -0,0 +1,55 @@ +name: malt_build +description: MALT, an acronym for MEGAN alignment tool, is a sequence alignment and + analysis tool designed for processing high-throughput sequencing data, especially + in the context of metagenomics. +keywords: + - malt + - alignment + - metagenomics + - ancient DNA + - aDNA + - palaeogenomics + - archaeogenomics + - microbiome + - database +tools: + - malt: + description: A tool for mapping metagenomic data + homepage: https://www.wsi.uni-tuebingen.de/lehrstuehle/algorithms-in-bioinformatics/software/malt/ + documentation: https://software-ab.cs.uni-tuebingen.de/download/malt/manual.pdf + doi: "10.1038/s41559-017-0446-6" + licence: ["GPL v3"] + identifier: "" +input: + - - fastas: + type: file + description: Directory of, or list of FASTA reference files for indexing + pattern: "*/|*.fasta" + - - gff: + type: file + description: Directory of, or GFF3 files of input FASTA files + pattern: "*/|*.gff|*.gff3" + - - mapping_db: + type: file + description: MEGAN .db file from https://software-ab.cs.uni-tuebingen.de/download/megan6/welcome.html + pattern: "*.db" +output: + - index: + - malt_index/: + type: directory + description: Directory containing MALT database index directory + pattern: "malt_index/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - log: + - malt-build.log: + type: file + description: Log file from STD out of malt-build + pattern: "malt-build.log" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/malt/build/tests/main.nf.test b/modules/nf-core/malt/build/tests/main.nf.test new file mode 100644 index 0000000..2294602 --- /dev/null +++ b/modules/nf-core/malt/build/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MALT_BUILD" + script "../main.nf" + process "MALT_BUILD" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "malt" + tag "malt/build" + tag "unzip" + + setup { + run("UNZIP") { + script "../../../unzip/main.nf" + process { + """ + input[0] = [[], file("s3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip", checkIfExists: true)] + """ + } + } + } + + test("sarscov2 - fastq") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + input[1] = [] + input[2] = UNZIP.out.unzipped_archive.map { it[1] } + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot( + path("${process.out.index[0]}/index0.idx"), + path("${process.out.index[0]}/ref.db"), + path("${process.out.index[0]}/ref.idx"), + path("${process.out.index[0]}/ref.inf"), + path("${process.out.index[0]}/taxonomy.idx"), + path("${process.out.index[0]}/taxonomy.map"), + path("${process.out.index[0]}/taxonomy.tre"), + process.out.versions + ) + .match() + }, + { assert path(process.out.log[0]).readLines().last().contains("Peak memory") }, + { assert path("${process.out.index[0]}/table0.db").exists() }, + { assert path("${process.out.index[0]}/table0.idx").exists() }, + ) + } + + } + + test("sarscov2 - fastq - stub") { + + options "-stub" + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + input[1] = [] + input[2] = UNZIP.out.unzipped_archive.map { it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/malt/build/tests/main.nf.test.snap b/modules/nf-core/malt/build/tests/main.nf.test.snap new file mode 100644 index 0000000..421a154 --- /dev/null +++ b/modules/nf-core/malt/build/tests/main.nf.test.snap @@ -0,0 +1,54 @@ +{ + "sarscov2 - fastq - stub": { + "content": [ + { + "0": [ + [ + + ] + ], + "1": [ + "versions.yml:md5,52c299d59c90219b9b442ee54f1acc97" + ], + "2": [ + "malt-build.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "index": [ + [ + + ] + ], + "log": [ + "malt-build.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,52c299d59c90219b9b442ee54f1acc97" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-01-26T11:49:19.685017271" + }, + "sarscov2 - fastq": { + "content": [ + "index0.idx:md5,1954f2c00b418d00112829b0a6adb8ce", + "ref.db:md5,772a09aeb162515485b037604399f2bd", + "ref.idx:md5,7dea362b3fac8e00956a4952a3d4f474", + "ref.inf:md5,b146842067cf278ef1d23e6c2e7c0c35", + "taxonomy.idx:md5,bb335e7c378a5bd85761b6eeed16d984", + "taxonomy.map:md5,5bb3f2192e925bca2e61e4b54f1671e0", + "taxonomy.tre:md5,f76fb2d5aa9b0d637234d48175841e0e", + [ + "versions.yml:md5,52c299d59c90219b9b442ee54f1acc97" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:04:45.72181253" + } +} \ No newline at end of file diff --git a/modules/nf-core/malt/build/tests/nextflow.config b/modules/nf-core/malt/build/tests/nextflow.config new file mode 100644 index 0000000..c538bb5 --- /dev/null +++ b/modules/nf-core/malt/build/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + + withName: MALT_BUILD { + ext.args = "--sequenceType DNA" + } + +} diff --git a/modules/nf-core/malt/build/tests/tags.yml b/modules/nf-core/malt/build/tests/tags.yml new file mode 100644 index 0000000..4e3c172 --- /dev/null +++ b/modules/nf-core/malt/build/tests/tags.yml @@ -0,0 +1,2 @@ +malt/build: + - "modules/nf-core/malt/build/**" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index f1cd99b..6f5b867 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.24.1 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index b9ccebd..ddecc6c 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,24 +1,23 @@ process MULTIQC { label 'process_single' - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.25--pyhdfd78af_0' : - 'biocontainers/multiqc:1.25--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' + : 'biocontainers/multiqc:1.25.1--pyhdfd78af_0'}" input: - path multiqc_files, stageAs: "?/*" - path(multiqc_config) - path(extra_multiqc_config) - path(multiqc_logo) - path(replace_names) - path(sample_names) + path multiqc_files, stageAs: "?/*" + path multiqc_config + path extra_multiqc_config + path multiqc_logo + path replace_names + path sample_names output: path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions + path "*_data", emit: data + path "*_plots", optional: true, emit: plots + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -26,21 +25,21 @@ process MULTIQC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' - def config = multiqc_config ? "--config $multiqc_config" : '' - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def config = multiqc_config ? "--config ${multiqc_config}" : '' + def extra_config = extra_multiqc_config ? "--config ${extra_multiqc_config}" : '' def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ - $args \\ - $config \\ - $prefix \\ - $extra_config \\ - $logo \\ - $replace \\ - $samples \\ + ${args} \\ + ${config} \\ + ${prefix} \\ + ${extra_config} \\ + ${logo} \\ + ${replace} \\ + ${samples} \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index b779e46..261dc0f 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,8c8724363a5efe0c6f43ab34faa57efd" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T12:41:34.562023" + "timestamp": "2024-10-02T17:51:46.317523" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,8c8724363a5efe0c6f43ab34faa57efd" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:27:11.933869532" + "timestamp": "2024-10-02T17:52:20.680978" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,8c8724363a5efe0c6f43ab34faa57efd" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:26:56.709849369" + "timestamp": "2024-10-02T17:52:09.185842" } } diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml new file mode 100644 index 0000000..5016d22 --- /dev/null +++ b/modules/nf-core/pigz/compress/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/main.nf b/modules/nf-core/pigz/compress/main.nf new file mode 100644 index 0000000..152e700 --- /dev/null +++ b/modules/nf-core/pigz/compress/main.nf @@ -0,0 +1,45 @@ +process PIGZ_COMPRESS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(raw_file) + + output: + tuple val(meta), path("$archive"), emit: archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + # Note: needs --stdout for pigz to avoid the following issue: + # pigz: skipping: ${raw_file} is a symbolic link + pigz --processes $task.cpus --stdout --force ${args} ${raw_file} > ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + touch ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml new file mode 100644 index 0000000..0966e65 --- /dev/null +++ b/modules/nf-core/pigz/compress/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pigz_compress" +description: Compresses files with pigz. +keywords: + - compress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" +output: + - archive: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - $archive: + type: file + description: The compressed file + pattern: "*.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@leoisl" +maintainers: + - "@leoisl" diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test new file mode 100644 index 0000000..b3cb25e --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + name "Test Process PIGZ_COMPRESS" + script "../main.nf" + process "PIGZ_COMPRESS" + + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/compress" + + test("sarscov2 - genome - fasta") { + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - genome - fasta - stub") { + options "-stub-run" + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap new file mode 100644 index 0000000..4d8df9f --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ], + "archive": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-12-11T22:39:53.350546" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + "genome.fasta.gz", + [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:18:32.339508" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/compress/tests/tags.yml b/modules/nf-core/pigz/compress/tests/tags.yml new file mode 100644 index 0000000..42c46bf --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/compress: + - "modules/nf-core/pigz/compress/**" diff --git a/modules/nf-core/unzip/environment.yml b/modules/nf-core/unzip/environment.yml new file mode 100644 index 0000000..e93c649 --- /dev/null +++ b/modules/nf-core/unzip/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::p7zip=16.02 diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf new file mode 100644 index 0000000..a0c0210 --- /dev/null +++ b/modules/nf-core/unzip/main.nf @@ -0,0 +1,49 @@ +process UNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/"), emit: unzipped_archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + 7za \\ + x \\ + -o"${prefix}"/ \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + mkdir "${prefix}" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml new file mode 100644 index 0000000..426fccb --- /dev/null +++ b/modules/nf-core/unzip/meta.yml @@ -0,0 +1,46 @@ +name: unzip +description: Unzip ZIP archive files +keywords: + - unzip + - decompression + - zip + - archiving +tools: + - unzip: + description: p7zip is a quick port of 7z.exe and 7za.exe (command line version + of 7zip, see www.7-zip.org) for Unix. + homepage: https://sourceforge.net/projects/p7zip/ + documentation: https://sourceforge.net/projects/p7zip/ + tool_dev_url: https://sourceforge.net/projects/p7zip" + licence: ["LGPL-2.1-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: ZIP file + pattern: "*.zip" +output: + - unzipped_archive: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/: + type: directory + description: Directory contents of the unzipped archive + pattern: "${archive.baseName}/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/unzip/tests/main.nf.test b/modules/nf-core/unzip/tests/main.nf.test new file mode 100644 index 0000000..238b68d --- /dev/null +++ b/modules/nf-core/unzip/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process UNZIP" + script "../main.nf" + process "UNZIP" + + tag "modules" + tag "modules_nfcore" + tag "unzip" + + test("generic [tar] [tar_gz]") { + + when { + process { + """ + input[0] = [ + [ id: 'hello' ], + file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("generic [tar] [tar_gz] stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'hello' ], + file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/unzip/tests/main.nf.test.snap b/modules/nf-core/unzip/tests/main.nf.test.snap new file mode 100644 index 0000000..cdd2ab1 --- /dev/null +++ b/modules/nf-core/unzip/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "generic [tar] [tar_gz] stub": { + "content": [ + { + "0": [ + [ + { + "id": "hello" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ], + "unzipped_archive": [ + [ + { + "id": "hello" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:16:37.11550986" + }, + "generic [tar] [tar_gz]": { + "content": [ + { + "0": [ + [ + { + "id": "hello" + }, + [ + "hello.tar:md5,80c66db79a773bc87b3346035ff9593e" + ] + ] + ], + "1": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ], + "unzipped_archive": [ + [ + { + "id": "hello" + }, + [ + "hello.tar:md5,80c66db79a773bc87b3346035ff9593e" + ] + ] + ], + "versions": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:16:25.120242571" + } +} \ No newline at end of file diff --git a/modules/nf-core/unzip/tests/tags.yml b/modules/nf-core/unzip/tests/tags.yml new file mode 100644 index 0000000..7f5647e --- /dev/null +++ b/modules/nf-core/unzip/tests/tags.yml @@ -0,0 +1,2 @@ +unzip: + - "modules/nf-core/unzip/**" diff --git a/nextflow.config b/nextflow.config index 862ebf2..3c4f616 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,10 +12,7 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + // MultiQC options multiqc_config = null multiqc_title = null @@ -46,7 +43,28 @@ params { config_profile_url = null // Schema validation default options validate_params = true - + + // General parameters + dbname = null + save_concatenated_fastas = false + + accession2taxid = null + prot2taxid = null + nucl2taxid = null + nodesdmp = null + namesdmp = null + malt_mapdb = null + + // tool specific options + build_bracken = false + build_diamond = false + build_kaiju = false + build_malt = false + malt_sequencetype = "DNA" + build_centrifuge = false + build_kraken2 = false + kraken2_keepintermediate = false + build_krakenuniq = false } // Load base.config by default for all pipelines @@ -153,6 +171,8 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } + } // Load nf-core custom profiles from different Institutions @@ -170,8 +190,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -249,10 +267,10 @@ validation { """ afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x + https://doi.org/10.1038/s41587-020-0439-x * Software dependencies - https://github.com/${manifest.name}/blob/master/CITATIONS.md + https://github.com/${manifest.name}/blob/master/CITATIONS.md """ } summary { diff --git a/nextflow_schema.json b/nextflow_schema.json index e3a38d3..42dc4b6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["input", "outdir", "dbname"], "properties": { "input": { "type": "string", @@ -20,7 +20,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/createtaxdb/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the reference genomes you wish to build into a metagenomic profiling database. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/createtaxdb/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -40,47 +40,124 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "dbname": { + "type": "string", + "description": "Specify name that resulting databases will be prefixed with.", + "fa_icon": "fas fa-id-badge" + }, + "accession2taxid": { + "type": "string", + "description": "NCBI-style four-column accession to taxonomy ID map file", + "help_text": "\nAn NCBI four column file tab-separated file with `accession`, `accession.version`, `taxid` and `gi` number. The first refers to an accession ID in each FASTA entry header. The second refers to the accession ID but with the accession version number appended to the end (e.g. `.1`). The third refers to the taxonomy ID of the organism the sequence belongs to, as listed in `nodes.dmp`. The fourth refers to the old-style NCBI gi number of the sequence.\n\nIf building with typical NCBI data, the most typical file is the `nucl_gb.accession2taxid` file from the [NCBI taxonomy FTP server](https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/.", + "fa_icon": "fas fa-address-book", + "format": "file-path" + }, + "prot2taxid": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-address-card", + "help_text": "A two column file tab-separated file with `accession.version` and `taxid`. The first refers to an accession ID in each FASTA entry header. The second refers to the taxonomy ID of the organism the sequence belongs to, as listed in `nodes.dmp`.", + "description": "Two column protein sequence accession ID to taxonomy map file." + }, + "nucl2taxid": { + "type": "string", + "format": "file-path", + "exists": true, + "fa_icon": "far fa-address-card", + "description": "Two column nucleotide sequence accession ID to taxonomy map file.", + "help_text": "A two column file tab-separated file with `accession.version` and `taxid`. The first refers to an accession ID in each FASTA entry header. The second refers to the taxonomy ID of the organism the sequence belongs to, as listed in `nodes.dmp`." + }, + "nodesdmp": { + "type": "string", + "format": "file-path", + "exists": true, + "fa_icon": "fas fa-circle", + "description": "Path to NCBI-style taxonomy node dmp file.", + "help_text": "A tab/pipe/tab separated table file. See nodes.dmp section of [NCBI taxdump README](https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/taxdump_readme.txt) for column file structure." + }, + "namesdmp": { + "type": "string", + "format": "file-path", + "exists": true, + "fa_icon": "fas fa-tag", + "description": "Path to NCBI-style taxonomy names dmp file.", + "help_text": "A tab/pipe/tab separated table file. See names.dmp section of [NCBI taxdump README](https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/taxdump_readme.txt) for column file structure." + }, + "malt_mapdb": { + "type": "string", + "format": "file-path", + "exists": true, + "description": "Path to MEGAN6/MALT mapping db file", + "help_text": "A mapping DB file containing taxonomy information for building MALT databases. This file can be downloaded from the [MEGAN6 website](https://software-ab.cs.uni-tuebingen.de/download/megan6/welcome.html). Can be zipped (the pipeline will unzip this for you if necessary). Typically the `megan-nucl-Feb2022.db.zip` is used with MALT.", + "fa_icon": "fas fa-database" + }, + "save_concatenated_fastas": { + "type": "boolean", + "description": "Save concatenated input FASTAs", + "help_text": "Some tools require a single input FASTA of all reference genomes. The pipeline will do this concatenation for you, and by supplying this flag you can save the resulting single fasta for you in the results directory under `cat/`. ", + "fa_icon": "fas fa-save" } } }, - "reference_genome_options": { - "title": "Reference genome options", + "database_building_options": { + "title": "Database Building Options", "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", + "description": "", + "default": "", "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "build_bracken": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on extending of Kraken2 database to include Bracken files. Requires nucleotide FASTA File input.", + "help_text": "Bracken2 databases are simply just a Kraken2 database with two additional files.\n\nNote however this requires a Kraken2 database _with_ intermediate files still in it, thus can result in large database directories." }, - "fasta": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "build_centrifuge": { + "type": "boolean", + "description": "Turn on building of Centrifuge database. Requires nucleotide FASTA file input.", + "fa_icon": "fas fa-toggle-on" }, - "igenomes_ignore": { + "build_diamond": { "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input." + }, + "build_kaiju": { + "type": "boolean", + "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", + "fa_icon": "fas fa-toggle-on" }, - "igenomes_base": { + "build_malt": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." + }, + "malt_sequencetype": { "type": "string", - "format": "directory-path", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" + "default": "DNA", + "description": "Specify type of input sequence being given to MALT", + "enum": ["DNA", "Protein"], + "help_text": "Use to specify whether the reference sequences are DNA or Protein sequences. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` ", + "fa_icon": "fas fa-dna" + }, + "build_kraken2": { + "type": "boolean", + "description": "Turn on building of Kraken2 database. Requires nucleotide FASTA file input.", + "fa_icon": "fas fa-toggle-on" + }, + "kraken2_keepintermediate": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Retain intermediate Kraken2 build files for inspection." + }, + "build_krakenuniq": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input." } - } + }, + "fa_icon": "fas fa-database" }, "institutional_config_options": { "title": "Institutional config options", @@ -227,7 +304,7 @@ "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/$defs/reference_genome_options" + "$ref": "#/definitions/database_building_options" }, { "$ref": "#/$defs/institutional_config_options" diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..f65ae9b --- /dev/null +++ b/nf-test.config @@ -0,0 +1,16 @@ +config { + // location for all nf-tests + testsDir "tests/" + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of library folder that is added automatically to the classpath + libDir "lib/" + + // location of an optional nextflow.config file specific for executing tests + configFile "nextflow.config" + + // run all test with the defined docker profile from the main nextflow.config + profile "" +} diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index 1e1b84d..ab4fa6a 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -8,23 +8,22 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW TO INITIALISE PIPELINE -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow PIPELINE_INITIALISATION { - take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime @@ -40,28 +39,28 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) - + // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null ) - + // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) // @@ -74,23 +73,7 @@ workflow PIPELINE_INITIALISATION { // Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } + .fromSamplesheet("input") .set { ch_samplesheet } emit: @@ -99,18 +82,16 @@ workflow PIPELINE_INITIALISATION { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW FOR PIPELINE COMPLETION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow PIPELINE_COMPLETION { - take: email // string: email address email_on_fail // string: email address sent on pipeline failure plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output hook_url // string: hook URL for notifications @@ -142,20 +123,24 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // Check and validate pipeline parameters // def validateInputParameters() { - genomeExistsError() + + // Validate DIAMOND parameter combinations + if (params.build_diamond && [!params.prot2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp (all are mandatory for DIAMOND)') + } } // @@ -165,20 +150,20 @@ def validateInputSamplesheet(input) { def (metas, fastqs) = input[1..2] // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + def endedness_ok = metas.collect { meta -> meta.single_end }.unique().size == 1 if (!endedness_ok) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } - return [ metas[0], fastqs ] + return [metas[0], fastqs] } // // Get attribute from genome config file e.g. fasta // def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] } } return null @@ -189,11 +174,7 @@ def getGenomeAttribute(attribute) { // def genomeExistsError() { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } } @@ -205,11 +186,17 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + params.build_bracken ? "Bracken (Lu et al. 2017)," : "", + params.build_centrifuge ? "Centrifuge (Kim et al. 2016)," : "", + params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", + params.build_kaiju ? "Kaiju (Menzel et al. 2016)," : "", + params.build_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", + params.build_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "", + params.build_malt ? "MALT (Vågene et al. 2018)," : "", + "and MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() return citation_text } @@ -219,9 +206,15 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + params.build_bracken ? "
  • Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. 10.7717/peerj-cs.104
  • " : "", + params.build_centrifuge ? "
  • Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. 10.1101/gr.210641.116
  • " : "", + params.build_diamond ? "
  • Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. 10.1038/nmeth.3176
  • " : "", + params.build_kaiju ? "
  • Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. 10.1038/ncomms11257
  • " : "", + params.build_kraken2 ? "
  • Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. 10.1186/s13059-019-1891-0
  • " : "", + params.build_krakenuniq ? "
  • Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198. 10.1186/s13059-018-1568-0
  • " : "", + params.build_malt ? "
  • Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. 10.1038/s41559-017-0446-6
  • " : "", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() return reference_text } @@ -243,23 +236,24 @@ def methodsDescriptionText(mqc_methods_yaml) { temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() + meta["tool_citations"] = toolCitationText().replaceAll(", \.", ".").replaceAll("\. \.", ".").replaceAll(", \.", ".") + meta["tool_bibliography"] = toolBibliographyText() def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() } - diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf new file mode 100644 index 0000000..cab2f6b --- /dev/null +++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf @@ -0,0 +1,41 @@ +include { KRAKEN2_ADD } from '../../../modules/nf-core/kraken2/add/main' +include { KRAKEN2_BUILD } from '../../../modules/nf-core/kraken2/build/main' +include { BRACKEN_BUILD } from '../../../modules/nf-core/bracken/build/main' + +workflow FASTA_BUILD_ADD_KRAKEN2_BRACKEN { + + take: + ch_fasta // channel: [ val(meta), [ fasta1, fasta2, fasta3] ] + ch_taxonomy_names // channel: [ names.dmp ] + ch_taxonomy_nodes // channel: [ nodes.dmp ] + ch_accession2taxid // channel: [ acc2taxidfile ] + val_cleanintermediates // value: [ true | false ] + val_runbrackenbuild // value: [ true | false ] + + main: + + if ( val_cleanintermediates && val_runbrackenbuild ) { error("Cannot perform Kraken2 cleanup and build Bracken database. Bracken requires intermediate files") } + val_cleanup = [ val_cleanintermediates && !val_runbrackenbuild ].any() ? true : false + + ch_versions = Channel.empty() + + KRAKEN2_ADD ( ch_fasta, ch_taxonomy_names, ch_taxonomy_nodes, ch_accession2taxid ) + ch_versions = ch_versions.mix( KRAKEN2_ADD.out.versions.first() ) + + KRAKEN2_BUILD ( KRAKEN2_ADD.out.db, val_cleanup ) + ch_versions = ch_versions.mix( KRAKEN2_BUILD.out.versions.first() ) + + if ( val_runbrackenbuild ) { + BRACKEN_BUILD ( KRAKEN2_BUILD.out.db ) + ch_final_db = BRACKEN_BUILD.out.db + ch_versions = ch_versions.mix( BRACKEN_BUILD.out.versions.first() ) + } + else { + ch_final_db = KRAKEN2_BUILD.out.db + ch_versions = ch_versions.mix( KRAKEN2_BUILD.out.versions.first() ) + } + + emit: + db = ch_final_db // channel: [ val(meta), [ db ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/meta.yml b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/meta.yml new file mode 100644 index 0000000..8125c60 --- /dev/null +++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/meta.yml @@ -0,0 +1,71 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_build_add_kraken2_bracken" +description: KRAKEN2 and BRACKEN build custom database subworkflow +keywords: + - metagenomics + - kraken2 + - database + - build + - custom + - bracken +components: + - kraken2/add + - kraken2/build + - bracken/build +input: + - ch_fasta: + type: file + description: | + Channel containing a meta with a list of FASTAs to be built + Structure: [ val(meta), [ fasta1, fasta2, fasta3 ] ] + pattern: "*.{fasta,fa,fna}" + - ch_taxonomy_names: + type: file + description: | + Channel containing a NCBI-style taxdump names file + Structure: [ names.dmp ] + pattern: "names.dmp" + - ch_taxonomy_nodes: + type: file + description: | + Channel containing a NCBI-style taxdump nodes file + Structure: [ nodes.dmp ] + pattern: "nodes.dmp" + - ch_accession2taxid: + type: file + description: | + Channel containing a NCBI-style taxdump accession2taxid (acc2tax) file + Structure: [ accession2taxid_file ] + pattern: "*.accession2taxid" + - val_cleanintermediates: + type: boolean + description: | + Boolean flag whether to clean up intermediate files after build or not. + If val_runbrackenbuild set, will be ignored as BRACKEN requires intermediate files. + Structure: [ val_cleanintermediate ] + pattern: "true|false" + - val_runbrackenbuild: + type: boolean + description: | + Boolean flag whether to additionally insert required BRACKEN database files into KRAKEN2 directory. + Note any changes for k-mer or read lengths must come via Nextflow config `ext.args`. + Structure: [ val_runbrackenbuild ] + pattern: "true|false" +output: + - db: + type: directory + description: | + Channel containing KRAKEN2 (and BRACKEN) database directory files. + Use `ext.prefix` in a modules.conf file to change default name + Structure: [ val(meta), path(db) ] + pattern: "*/" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test new file mode 100644 index 0000000..94bc184 --- /dev/null +++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test @@ -0,0 +1,160 @@ +nextflow_workflow { + + name "Test Subworkflow FASTA_BUILD_ADD_KRAKEN2_BRACKEN" + script "../main.nf" + workflow "FASTA_BUILD_ADD_KRAKEN2_BRACKEN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fasta_build_add_kraken2_bracken" + tag "gunzip" + tag "kraken2" + tag "kraken2/add" + tag "kraken2/build" + tag "bracken/build" + + test("metagenome - nocleanup - nobracken - fasta") { + + setup { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'db'], + file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true) + ] + ) + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of([[id:'db'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip).groupTuple() + input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true) + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot ( + workflow.out.versions, + path("${workflow.out.db[0][1]}/hash.k2d"), + path("${workflow.out.db[0][1]}/taxo.k2d"), + file("${workflow.out.db[0][1]}/opts.k2d").name, + ).match() + }, + { assert path("${workflow.out.db[0][1]}/library/").exists() }, + { assert path("${workflow.out.db[0][1]}/taxonomy/").exists() } + ) + } + } + + test("metagenome - withcleanup - nobracken - fasta") { + + setup { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = Channel.of([\ + [id:'db'], + file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true) + ] + ) + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of([[id:'db'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip).groupTuple() + input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true) + input[4] = true + input[5] = false + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.db.get(0).get(1) ==~ ".*/db" }, + { assert snapshot ( + workflow.out.versions, + path("${workflow.out.db[0][1]}/hash.k2d"), + path("${workflow.out.db[0][1]}/taxo.k2d"), + file("${workflow.out.db[0][1]}/opts.k2d").name, + file("${workflow.out.db[0][1]}/unmapped.txt").name + ).match() + }, + { assert !path("${workflow.out.db[0][1]}/library/").exists() }, + { assert !path("${workflow.out.db[0][1]}/taxonomy/").exists() } + ) + } + } + +test("metagenome - nocleanup - withbracken - fasta") { + + setup { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = Channel.of([\ + [id:'db'], + file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true) + ] + ) + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of([[id:'db'], file(params.modules_testdata_base_path + '/genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true)]).mix(GUNZIP.out.gunzip).groupTuple() + input[1] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true) + input[4] = false + input[5] = true + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.db.get(0).get(1) ==~ ".*/db" }, + { assert path("${workflow.out.db[0][1]}/library/").exists() }, + { assert path("${workflow.out.db[0][1]}/taxonomy/").exists() }, + { assert snapshot ( + workflow.out.versions, + path("${workflow.out.db[0][1]}/hash.k2d"), + path("${workflow.out.db[0][1]}/taxo.k2d"), + file("${workflow.out.db[0][1]}/opts.k2d").name, + file("${workflow.out.db[0][1]}/unmapped.txt").name, + file("${workflow.out.db[0][1]}/database100mers.kmer_distrib").name, + file("${workflow.out.db[0][1]}/database100mers.kraken").name, + file("${workflow.out.db[0][1]}/database.kraken").name + ).match() + } + ) + } + } +} diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap new file mode 100644 index 0000000..9ad0c78 --- /dev/null +++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/main.nf.test.snap @@ -0,0 +1,58 @@ +{ + "metagenome - nocleanup - nobracken - fasta": { + "content": [ + [ + "versions.yml:md5,b5f92f68a6af1f422ccc1a5c75178793", + "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae", + "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae" + ], + "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372", + "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816", + "opts.k2d" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-18T09:04:48.196774778" + }, + "metagenome - withcleanup - nobracken - fasta": { + "content": [ + [ + "versions.yml:md5,b5f92f68a6af1f422ccc1a5c75178793", + "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae", + "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae" + ], + "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372", + "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816", + "opts.k2d", + "unmapped.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-18T08:42:24.87325051" + }, + "metagenome - nocleanup - withbracken - fasta": { + "content": [ + [ + "versions.yml:md5,130d220d293e4f75863b6c0756bb8324", + "versions.yml:md5,b5f92f68a6af1f422ccc1a5c75178793", + "versions.yml:md5,f815f0afa0f648fb6532bf6d780ce0ae" + ], + "hash.k2d:md5,4717689f8ba88d4cae51ecc7c9d9b372", + "taxo.k2d:md5,24338e2d78f803f48bcc5653c6e51816", + "opts.k2d", + "unmapped.txt", + "database100mers.kmer_distrib", + "database100mers.kraken", + "database.kraken" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-18T10:58:01.065026262" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml new file mode 100644 index 0000000..40273bf --- /dev/null +++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fasta_build_add_kraken2_bracken: + - subworkflows/nf-core/fasta_build_add_kraken2_bracken/** diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..c19b1ad --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,5 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ diff --git a/tests/tags.yml b/tests/tags.yml new file mode 100644 index 0000000..1e63ed9 --- /dev/null +++ b/tests/tags.yml @@ -0,0 +1,2 @@ +nfcore_createtaxdb: + - ./** diff --git a/tests/test.nf.test b/tests/test.nf.test new file mode 100644 index 0000000..76a073c --- /dev/null +++ b/tests/test.nf.test @@ -0,0 +1,51 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_CREATETAXDB" + script "main.nf" + tag "pipeline" + tag "nfcore_createtaxdb" + tag "test" + + test("test_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + file("$outputDir/bracken/database/database100mers.kmer_distrib").name, + file("$outputDir/bracken/database/database100mers.kraken").name, + file("$outputDir/bracken/database/database.kraken").name, + path("$outputDir/centrifuge/"), + path("$outputDir/diamond/database.dmnd"), + path("$outputDir/kaiju/database.fmi"), + path("$outputDir/kraken2/database/hash.k2d"), + file("$outputDir/kraken2/database/opts.k2d").name, + path("$outputDir/kraken2/database/taxo.k2d"), + path("$outputDir/krakenuniq/database/database-build.log").readLines().last().contains('database.idx'), + file("$outputDir/krakenuniq/database/database.idx").name, + file("$outputDir/krakenuniq/database/database.kdb"), + file("$outputDir/krakenuniq/database/taxDB"), + path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'), + path("$outputDir/malt/malt_index/index0.idx"), + path("$outputDir/malt/malt_index/ref.db"), + path("$outputDir/malt/malt_index/ref.idx"), + path("$outputDir/malt/malt_index/ref.inf"), + path("$outputDir/malt/malt_index/taxonomy.idx"), + path("$outputDir/malt/malt_index/taxonomy.map"), + path("$outputDir/malt/malt_index/taxonomy.tre") + ).match() + }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert path("$outputDir/malt/malt_index/table0.db").exists() }, + { assert path("$outputDir/malt/malt_index/table0.idx").exists() }, + ) + } + } +} diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap new file mode 100644 index 0000000..8e3bb52 --- /dev/null +++ b/tests/test.nf.test.snap @@ -0,0 +1,37 @@ +{ + "test_profile": { + "content": [ + "database100mers.kmer_distrib", + "database100mers.kraken", + "database.kraken", + [ + "database.1.cf:md5,1481615ab90b5573f6d9e57f97890178", + "database.2.cf:md5,d50fa66e215e80284314ff6521dcd4a4", + "database.3.cf:md5,beafa92166ba461f9bda1dac0b640f45", + "database.4.cf:md5,2902ec5df0db6da41a91b40d2f46b30d" + ], + "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462", + "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", + "hash.k2d:md5,01122a04dcef29ceb3baa68a9f6e6ef5", + "opts.k2d", + "taxo.k2d:md5,cd8170a8c5a1b763a9ac1ffa2107cc88", + true, + "database.idx", + "database.kdb:md5,a24fce43bedbc6c420f6e36d10c112a3", + "taxDB:md5,1aed1afa948daffc236deba1c5d635db", + true, + "index0.idx:md5,876139dc930e68992cd2625e08bba48a", + "ref.db:md5,377073f58a9f9b85acca59fcf21744a9", + "ref.idx:md5,676393d0f4826dac3f47aa5290632570", + "ref.inf:md5,446275f4f879e2b115e983db7c9ced18", + "taxonomy.idx:md5,1e2e8fdc703a6d2707e7cbefd2b6d93f", + "taxonomy.map:md5,5bb3f2192e925bca2e61e4b54f1671e0", + "taxonomy.tre:md5,f76fb2d5aa9b0d637234d48175841e0e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-30T10:54:40.551963562" + } +} \ No newline at end of file diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index b90cc85..b230059 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -3,12 +3,29 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' + + +// Preprocessing +include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' +include { PIGZ_COMPRESS as PIGZ_COMPRESS_DNA } from '../modules/nf-core/pigz/compress/main' +include { PIGZ_COMPRESS as PIGZ_COMPRESS_AA } from '../modules/nf-core/pigz/compress/main' +include { CAT_CAT as CAT_CAT_DNA } from '../modules/nf-core/cat/cat/main' +include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/main' + +// Database building (with specific auxiliary modules) +include { CENTRIFUGE_BUILD } from '../modules/nf-core/centrifuge/build/main' +include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' +include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' +include { KRAKENUNIQ_BUILD } from '../modules/nf-core/krakenuniq/build/main' +include { UNZIP } from '../modules/nf-core/unzip/main' +include { MALT_BUILD } from '../modules/nf-core/malt/build/main' + +include { FASTA_BUILD_ADD_KRAKEN2_BRACKEN } from '../subworkflows/nf-core/fasta_build_add_kraken2_bracken/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -17,21 +34,169 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea */ workflow CREATETAXDB { - take: - ch_samplesheet // channel: samplesheet read in from --input + ch_samplesheet // channel: samplesheet read in from --input + ch_taxonomy_namesdmp // channel: taxonomy names file + ch_taxonomy_nodesdmp // channel: taxonomy nodes file + ch_accession2taxid // channel: accession2taxid file + ch_nucl2taxid // channel: nucl2taxid file + ch_prot2taxid // channel: prot2taxid file + ch_malt_mapdb // channel: maltmap file + main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - // - // MODULE: Run FastQC - // - FASTQC ( - ch_samplesheet - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + DATA PREPARATION + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // PREPARE: Prepare input for single file inputs modules + + if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq].any()) { + // Pull just DNA sequences + + ch_dna_refs_for_singleref = ch_samplesheet + .map { meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_dna] } + .filter { meta, fasta_dna -> + fasta_dna + } + + ch_dna_for_unzipping = ch_dna_refs_for_singleref.branch { meta, fasta -> + zipped: fasta.extension == 'gz' + unzipped: true + } + + GUNZIP_DNA(ch_dna_for_unzipping.zipped) + ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).groupTuple() + ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) + + // Place in single file + ch_singleref_for_dna = CAT_CAT_DNA(ch_prepped_dna_fastas) + ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first()) + } + + // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju... + // TEST first! + // docs: https://github.com/bioinformatics-centre/kaiju#custom-database + // docs: https://github.com/nf-core/test-datasets/tree/taxprofiler#kaiju + // idea: try just appending `_` to end of each sequence header using a local sed module... it might be sufficient + if ([params.build_kaiju, params.build_diamond].any()) { + + ch_aa_refs_for_singleref = ch_samplesheet + .map { meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_aa] } + .filter { meta, fasta_aa -> + fasta_aa + } + + ch_aa_for_zipping = ch_aa_refs_for_singleref.branch { meta, fasta -> + zipped: fasta.extension == 'gz' + unzipped: true + } + + PIGZ_COMPRESS_AA(ch_aa_for_zipping.unzipped) + ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix(ch_aa_for_zipping.zipped).groupTuple() + //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) + + ch_singleref_for_aa = CAT_CAT_AA(ch_prepped_aa_fastas) + ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first()) + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + DATABASE BUILDING + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // Module: Run CENTRIFUGE/BUILD + + if (params.build_centrifuge) { + CENTRIFUGE_BUILD(CAT_CAT_DNA.out.file_out, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, []) + ch_versions = ch_versions.mix(CENTRIFUGE_BUILD.out.versions.first()) + ch_centrifuge_output = CENTRIFUGE_BUILD.out.cf + } + else { + ch_centrifuge_output = Channel.empty() + } + + // MODULE: Run DIAMOND/MAKEDB + + if (params.build_diamond) { + DIAMOND_MAKEDB(CAT_CAT_AA.out.file_out, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp) + ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) + ch_diamond_output = DIAMOND_MAKEDB.out.db + } + else { + ch_diamond_output = Channel.empty() + } + + // MODULE: Run KAIJU/MKFMI + + if (params.build_kaiju) { + KAIJU_MKFMI(CAT_CAT_AA.out.file_out) + ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first()) + ch_kaiju_output = KAIJU_MKFMI.out.fmi + } + else { + ch_kaiju_output = Channel.empty() + } + + // SUBWORKFLOW: Kraken2 and Bracken + // Bracken requires intermediate files, if build_bracken=true then kraken2_keepintermediate=true, otherwise an error will be raised + // Condition is inverted because subworkflow asks if you want to 'clean' (true) or not, but pipeline says to 'keep' + if (params.build_kraken2 || params.build_bracken) { + def k2_keepintermediates = params.kraken2_keepintermediate || params.build_bracken ? false : true + FASTA_BUILD_ADD_KRAKEN2_BRACKEN(CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken) + ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first()) + ch_kraken2_bracken_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db + } + else { + ch_kraken2_bracken_output = Channel.empty() + } + + // SUBWORKFLOW: Run KRAKENUNIQ/BUILD + if (params.build_krakenuniq) { + + ch_taxdmpfiles_for_krakenuniq = Channel.of(ch_taxonomy_namesdmp).combine(Channel.of(ch_taxonomy_nodesdmp)).map { [it] } + ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, reads, taxdump -> [meta, reads, taxdump, ch_nucl2taxid] }.dump(tag: 'input_to_ku') + + KRAKENUNIQ_BUILD(ch_input_for_krakenuniq) + ch_versions = ch_versions.mix(KRAKENUNIQ_BUILD.out.versions.first()) + ch_krakenuniq_output = KRAKENUNIQ_BUILD.out.db + } + else { + ch_krakenuniq_output = Channel.empty() + } + + // Module: Run MALT/BUILD + + if (params.build_malt) { + + // The map DB file comes zipped (for some reason) from MEGAN6 website + if (file(params.malt_mapdb).extension == 'zip') { + ch_malt_mapdb = UNZIP([[], params.malt_mapdb]).unzipped_archive.map { meta, file -> [file] } + } + else { + ch_malt_mapdb = file(params.malt_mapdb) + } + + if (params.malt_sequencetype == 'Protein') { + ch_input_for_malt = ch_prepped_aa_fastas.map { meta, file -> file } + } + else { + ch_input_for_malt = ch_prepped_dna_fastas.map { meta, file -> file } + } + + MALT_BUILD(ch_input_for_malt, [], ch_malt_mapdb) + ch_versions = ch_versions.mix(MALT_BUILD.out.versions.first()) + ch_malt_output = MALT_BUILD.out.index + } + else { + ch_malt_output = Channel.empty() + } // // Collate and save software versions @@ -39,36 +204,43 @@ workflow CREATETAXDB { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', sort: true, newLine: true - ).set { ch_collated_versions } + ) + .set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.fromPath("${workflow.projectDir}/docs/images/nf-core-createtaxdb_logo_light_tax.png", checkIfExists: true) + + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( @@ -78,7 +250,7 @@ workflow CREATETAXDB { ) ) - MULTIQC ( + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), @@ -86,14 +258,16 @@ workflow CREATETAXDB { [], [] ) + multiqc_report = MULTIQC.out.report.toList() - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + centrifuge_database = ch_centrifuge_output + diamond_database = ch_diamond_output + kaiju_database = ch_kaiju_output + kraken2_bracken_database = ch_kraken2_bracken_output + krakenuniq_database = ch_krakenuniq_output + malt_database = ch_malt_output } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls