From a195ec92c2e98b96d7e57239e4cb05066cee1c87 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 5 Dec 2023 21:49:42 +0100 Subject: [PATCH 01/20] Add first two modules (diamond and kaiju), missing docs and tests --- assets/schema_input.json | 52 ++++-- assets/test.csv | 3 + conf/modules.config | 12 -- docs/output.md | 20 +-- modules.json | 20 +++ modules/nf-core/cat/cat/environment.yml | 7 + modules/nf-core/cat/cat/main.nf | 62 +++++++ modules/nf-core/cat/cat/meta.yml | 36 +++++ modules/nf-core/cat/cat/tests/main.nf.test | 153 ++++++++++++++++++ .../nf-core/cat/cat/tests/main.nf.test.snap | 121 ++++++++++++++ .../cat/tests/nextflow_unzipped_zipped.config | 6 + .../cat/tests/nextflow_zipped_unzipped.config | 8 + modules/nf-core/cat/cat/tests/tags.yml | 2 + .../nf-core/diamond/makedb/environment.yml | 7 + modules/nf-core/diamond/makedb/main.nf | 65 ++++++++ modules/nf-core/diamond/makedb/meta.yml | 57 +++++++ .../nf-core/diamond/makedb/tests/main.nf.test | 89 ++++++++++ .../diamond/makedb/tests/main.nf.test.snap | 41 +++++ modules/nf-core/diamond/makedb/tests/tags.yml | 2 + modules/nf-core/kaiju/mkfmi/main.nf | 36 +++++ modules/nf-core/kaiju/mkfmi/meta.yml | 44 +++++ modules/nf-core/malt/build/environment.yml | 7 + modules/nf-core/malt/build/main.nf | 43 +++++ modules/nf-core/malt/build/meta.yml | 49 ++++++ nextflow.config | 17 +- nextflow_schema.json | 38 ++++- workflows/createtaxdb.nf | 73 ++++++--- 27 files changed, 1008 insertions(+), 62 deletions(-) create mode 100644 assets/test.csv create mode 100644 modules/nf-core/cat/cat/environment.yml create mode 100644 modules/nf-core/cat/cat/main.nf create mode 100644 modules/nf-core/cat/cat/meta.yml create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config create mode 100644 modules/nf-core/cat/cat/tests/tags.yml create mode 100644 modules/nf-core/diamond/makedb/environment.yml create mode 100644 modules/nf-core/diamond/makedb/main.nf create mode 100644 modules/nf-core/diamond/makedb/meta.yml create mode 100644 modules/nf-core/diamond/makedb/tests/main.nf.test create mode 100644 modules/nf-core/diamond/makedb/tests/main.nf.test.snap create mode 100644 modules/nf-core/diamond/makedb/tests/tags.yml create mode 100644 modules/nf-core/kaiju/mkfmi/main.nf create mode 100644 modules/nf-core/kaiju/mkfmi/meta.yml create mode 100644 modules/nf-core/malt/build/environment.yml create mode 100644 modules/nf-core/malt/build/main.nf create mode 100644 modules/nf-core/malt/build/meta.yml diff --git a/assets/schema_input.json b/assets/schema_input.json index cec9d78..caa0ff8 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,30 +7,60 @@ "items": { "type": "object", "properties": { - "sample": { + "id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "unique": true, + "errorMessage": "Sequence reference name must be provided and cannot contain spaces", + "meta": ["id"], + "anyOf": [ + { + "dependentRequired": ["fasta_dna"] + }, + { + "dependentRequired": ["fasta_aa"] + } + ] }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "taxid": { + "type": "integer", + "unique": true, + "errorMessage": "Please provide a valid taxonomic ID in integer format", + "meta": ["taxid"] }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "fasta_dna": { "anyOf": [ { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" + "pattern": "^\\S+\\.(fasta|fas|fa|fna)(\\.gz)?$" }, { "type": "string", "maxLength": 0 } - ] + ], + "unique": true, + "errorMessage": "FASTA file for nucleotide sequence cannot contain spaces and must have a valid FASTA extension (fasta, fna, fa, fas, faa), optionally gzipped", + "exists": true, + "format": "file-path" + }, + "fasta_aa": { + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.(fasta|fas|fa|faa)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "unique": true, + "errorMessage": "FASTA file for amino acid reference sequence cannot contain spaces and must have a valid FASTA extension (fasta, fna, fa, fas, faa), optionally gzipped", + "exists": true, + "format": "file-path" } }, - "required": ["sample", "fastq_1"] + "required": ["id", "taxid"] } } diff --git a/assets/test.csv b/assets/test.csv new file mode 100644 index 0000000..b7af347 --- /dev/null +++ b/assets/test.csv @@ -0,0 +1,3 @@ +id,taxid,fasta_dna,fasta_aa +Severe_acute_respiratory_syndrome_coronavirus_2,2697049,/home/james/Downloads/createtaxdb/sarscov2.fasta,/home/james/Downloads/createtaxdb/sarscov2.faa +Haemophilus_influenzae,727,/home/james/Downloads/createtaxdb/haemophilus_infuenzae.fna.gz, diff --git a/conf/modules.config b/conf/modules.config index 39e8138..4fa7d76 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,18 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: FASTQC { - ext.args = '--quiet' - } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/docs/output.md b/docs/output.md index ccf3960..4d5a517 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,32 +12,22 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -### FastQC +### Kaiju
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `kaiju/` + - `.fmi`: Kaiju FMI file
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +[Kaiju](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) is a fast and sensitive taxonomic classification for metagenomics utilising nucletoide to protein translations. -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) - -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) - -:::note -The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. -::: +The `fmi` file can be given to kaiju itself with `kaiju -f ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..00a8db0 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,36 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..5766daa --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..423571b --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000..ec26b0f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000..fbc7978 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000..37b578f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/diamond/makedb/environment.yml b/modules/nf-core/diamond/makedb/environment.yml new file mode 100644 index 0000000..0196bd6 --- /dev/null +++ b/modules/nf-core/diamond/makedb/environment.yml @@ -0,0 +1,7 @@ +name: diamond_makedb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::diamond=2.1.8 diff --git a/modules/nf-core/diamond/makedb/main.nf b/modules/nf-core/diamond/makedb/main.nf new file mode 100644 index 0000000..94011cf --- /dev/null +++ b/modules/nf-core/diamond/makedb/main.nf @@ -0,0 +1,65 @@ +process DIAMOND_MAKEDB { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/diamond:2.1.8--h43eeafb_0' : + 'biocontainers/diamond:2.1.8--h43eeafb_0' }" + + input: + tuple val(meta), path(fasta) + path taxonmap + path taxonnodes + path taxonnames + + output: + tuple val(meta), path("*.dmnd"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + def insert_taxonmap = taxonmap ? "--taxonmap $taxonmap" : "" + def insert_taxonnodes = taxonnodes ? "--taxonnodes $taxonnodes" : "" + def insert_taxonnames = taxonnames ? "--taxonnames $taxonnames" : "" + + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + diamond \\ + makedb \\ + --threads ${task.cpus} \\ + --in ${fasta_name} \\ + -d ${prefix} \\ + ${args} \\ + ${insert_taxonmap} \\ + ${insert_taxonnodes} \\ + ${insert_taxonnames} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.dmnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/diamond/makedb/meta.yml b/modules/nf-core/diamond/makedb/meta.yml new file mode 100644 index 0000000..fdcf032 --- /dev/null +++ b/modules/nf-core/diamond/makedb/meta.yml @@ -0,0 +1,57 @@ +name: diamond_makedb +description: Builds a DIAMOND database +keywords: + - fasta + - diamond + - index + - database +tools: + - diamond: + description: Accelerated BLAST compatible local sequence aligner + homepage: https://github.com/bbuchfink/diamond + documentation: https://github.com/bbuchfink/diamond/wiki + tool_dev_url: https://github.com/bbuchfink/diamond + doi: "10.1038/s41592-021-01101-x" + licence: ["GPL v3.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - taxonmap: + type: file + description: Optional mapping file of NCBI protein accession numbers to taxon ids (gzip compressed), required for taxonomy functionality. + pattern: "*.gz" + - taxonnodes: + type: file + description: Optional NCBI taxonomy nodes.dmp file, required for taxonomy functionality. + pattern: "*.dmp" + - taxonnames: + type: file + description: Optional NCBI taxonomy names.dmp file, required for taxonomy functionality. + pattern: "*.dmp" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - db: + type: file + description: File of the indexed DIAMOND database + pattern: "*.dmnd" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@spficklin" +maintainers: + - "@spficklin" + - "@vagkaratzas" + - "@jfy133" diff --git a/modules/nf-core/diamond/makedb/tests/main.nf.test b/modules/nf-core/diamond/makedb/tests/main.nf.test new file mode 100644 index 0000000..e9f7df2 --- /dev/null +++ b/modules/nf-core/diamond/makedb/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_process { + + name "Test Process DIAMOND_MAKEDB" + script "../main.nf" + process "DIAMOND_MAKEDB" + tag "modules" + tag "modules_nfcore" + tag "diamond" + tag "diamond/makedb" + + test("Should build a DIAMOND db file from a fasta file without taxonomic information") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.db).match("db") }, + { assert process.out.versions } + ) + } + + } + + test("Should build a DIAMOND db file from a zipped fasta file without taxonomic information") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta_gz'], checkIfExists: true) ] ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.db).match("gz_db") }, + { assert process.out.versions } + ) + } + + } + + test("Should build a DIAMOND db file from a fasta file with taxonomic information") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] ] + input[1] = [ file(params.test_data['sarscov2']['metagenome']['prot_accession2taxid_gz'], checkIfExists: true) ] + input[2] = [ file(params.test_data['sarscov2']['metagenome']['prot_nodes_dmp'], checkIfExists: true) ] + input[3] = [ file(params.test_data['sarscov2']['metagenome']['prot_names_dmp'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.db).match("db_tax") }, + { assert process.out.versions } + ) + } + + } + +} diff --git a/modules/nf-core/diamond/makedb/tests/main.nf.test.snap b/modules/nf-core/diamond/makedb/tests/main.nf.test.snap new file mode 100644 index 0000000..a8fe065 --- /dev/null +++ b/modules/nf-core/diamond/makedb/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "db_tax": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dmnd:md5,9d57aa88cd1766adfda8360876fc0e4f" + ] + ] + ], + "timestamp": "2023-12-05T14:28:48.616684409" + }, + "db": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dmnd:md5,6039420745dd4db6e761244498460ae1" + ] + ] + ], + "timestamp": "2023-12-05T14:22:02.696726038" + }, + "gz_db": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dmnd:md5,6039420745dd4db6e761244498460ae1" + ] + ] + ], + "timestamp": "2023-12-05T14:22:07.430248018" + } +} \ No newline at end of file diff --git a/modules/nf-core/diamond/makedb/tests/tags.yml b/modules/nf-core/diamond/makedb/tests/tags.yml new file mode 100644 index 0000000..6fc7762 --- /dev/null +++ b/modules/nf-core/diamond/makedb/tests/tags.yml @@ -0,0 +1,2 @@ +diamond/makedb: + - modules/nf-core/diamond/makedb/** diff --git a/modules/nf-core/kaiju/mkfmi/main.nf b/modules/nf-core/kaiju/mkfmi/main.nf new file mode 100644 index 0000000..bd9bad6 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/main.nf @@ -0,0 +1,36 @@ +process KAIJU_MKFMI { + tag "$meta.id" + label 'process_high' + + conda "bioconda::kaiju=1.9.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.9.2--h5b5514e_0': + 'biocontainers/kaiju:1.9.2--h5b5514e_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fmi"), emit: fmi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kaiju-mkbwt \\ + $args \\ + -n $task.cpus \\ + -o ${prefix} \\ + ${fasta} + kaiju-mkfmi ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/kaiju/mkfmi/meta.yml b/modules/nf-core/kaiju/mkfmi/meta.yml new file mode 100644 index 0000000..fe06f35 --- /dev/null +++ b/modules/nf-core/kaiju/mkfmi/meta.yml @@ -0,0 +1,44 @@ +name: "kaiju_mkfmi" +description: Make Kaiju FMI-index file from a protein FASTA file +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling + - database + - index +tools: + - "kaiju": + description: "Fast and sensitive taxonomic classification for metagenomics" + homepage: "https://bioinformatics-centre.github.io/kaiju/" + documentation: "https://github.com/bioinformatics-centre/kaiju/blob/master/README.md" + tool_dev_url: "https://github.com/bioinformatics-centre/kaiju" + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Uncompressed Protein FASTA file (mandatory) + pattern: "*.{fa,faa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fmi: + type: file + description: Kaiju FM-index file + pattern: "*.{fmi}" + +authors: + - "@alxndrdiaz" diff --git a/modules/nf-core/malt/build/environment.yml b/modules/nf-core/malt/build/environment.yml new file mode 100644 index 0000000..a3b37a6 --- /dev/null +++ b/modules/nf-core/malt/build/environment.yml @@ -0,0 +1,7 @@ +name: malt_build +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::malt=0.61 diff --git a/modules/nf-core/malt/build/main.nf b/modules/nf-core/malt/build/main.nf new file mode 100644 index 0000000..f55aeee --- /dev/null +++ b/modules/nf-core/malt/build/main.nf @@ -0,0 +1,43 @@ +process MALT_BUILD { + + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' : + 'biocontainers/malt:0.61--hdfd78af_0' }" + + input: + path fastas + path gff + path mapping_db + + output: + path "malt_index/" , emit: index + path "versions.yml" , emit: versions + path "malt-build.log", emit: log + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def igff = gff ? "-igff ${gff}" : "" + + """ + malt-build \\ + -v \\ + --input ${fastas.join(' ')} \\ + $igff \\ + -d 'malt_index/' \\ + -t $task.cpus \\ + $args \\ + -mdb ${mapping_db}/*.db |&tee malt-build.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + malt: \$(malt-build --help |& tail -n 3 | head -n 1 | cut -f 2 -d'(' | cut -f 1 -d ',' | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/malt/build/meta.yml b/modules/nf-core/malt/build/meta.yml new file mode 100644 index 0000000..b95fc52 --- /dev/null +++ b/modules/nf-core/malt/build/meta.yml @@ -0,0 +1,49 @@ +name: malt_build +description: MALT, an acronym for MEGAN alignment tool, is a sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics. +keywords: + - malt + - alignment + - metagenomics + - ancient DNA + - aDNA + - palaeogenomics + - archaeogenomics + - microbiome + - database +tools: + - malt: + description: A tool for mapping metagenomic data + homepage: https://www.wsi.uni-tuebingen.de/lehrstuehle/algorithms-in-bioinformatics/software/malt/ + documentation: https://software-ab.cs.uni-tuebingen.de/download/malt/manual.pdf + doi: "10.1038/s41559-017-0446-6" + licence: ["GPL v3"] +input: + - fastas: + type: file + description: Directory of, or list of FASTA reference files for indexing + pattern: "*/|*.fasta" + - gff: + type: file + description: Directory of, or GFF3 files of input FASTA files + pattern: "*/|*.gff|*.gff3" + - mapping_db: + type: file + description: MEGAN .db file from https://software-ab.cs.uni-tuebingen.de/download/megan6/welcome.html + pattern: "*.db" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: directory + description: Directory containing MALT database index directory + pattern: "malt_index/" + - log: + type: file + description: Log file from STD out of malt-build + pattern: "malt-build.log" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 7d3737e..0a11985 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,7 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - + // MultiQC options multiqc_config = null @@ -43,7 +43,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -58,6 +58,19 @@ params { validationShowHiddenParams = false validate_params = true + // General parameters + dbname = "database" + + prot2taxid = null + nuc2taxid = null + nodesdmp = null + namesdmp = null + + // tool specific options + build_kaiju = false + + build_diamond = false + } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index cea6bc0..4fdc189 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -20,7 +20,8 @@ "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/createtaxdb/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "schema": "assets/schema_input.json" }, "outdir": { "type": "string", @@ -266,6 +267,35 @@ "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } + }, + "new_group_1": { + "title": "New Group 1", + "type": "object", + "description": "", + "default": "", + "properties": { + "dbname": { + "type": "string", + "default": "database", + "description": "Specify name that resulting databases will be prefixed with.", + "fa_icon": "fas fa-id-badge" + } + }, + "fa_icon": "fas fa-globe" + }, + "new_group_2": { + "title": "New Group 2", + "type": "object", + "description": "", + "default": "", + "properties": { + "build_kaiju": { + "type": "boolean", + "description": "Turn on building of KAIJU database. Requires protein files.", + "fa_icon": "fas fa-toggle-on" + } + }, + "fa_icon": "fas fa-hard-hat" } }, "allOf": [ @@ -283,6 +313,12 @@ }, { "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/new_group_1" + }, + { + "$ref": "#/definitions/new_group_2" } ] } diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index ff56f63..34690aa 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -15,6 +15,19 @@ log.info logo + paramsSummaryLog(workflow) + citation WorkflowCreatetaxdb.initialise(params, log) +// Validate input files parameters (from Sarek) +def checkPathParamList = [ + params.prot2taxid, + params.nuc2taxid, + params.nodesdmp, + params.namesdmp, +] + +for (param in checkPathParamList) if (param) file(param, checkIfExists: true) + +// Validate parameter combinations +if ( params.build_diamond && ![params.prot2taxid, params.nodesdmp, params.namesdmp,].any() ) { error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp') } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -32,11 +45,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -46,10 +54,13 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' + include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/main' +include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' +include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -64,23 +75,44 @@ workflow CREATETAXDB { ch_versions = Channel.empty() // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // INPUT: Read in samplesheet, validate and stage input files // - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema + ch_input = Channel.fromSamplesheet("input") + + // Prepare input for single file inputs modules + + // TODO: Need to have a modification step to get header correct to actually run with kaiju... + // docs: https://github.com/bioinformatics-centre/kaiju#custom-database + // docs: https://github.com/nf-core/test-datasets/tree/taxprofiler#kaiju + // idea: try just appending `_` to end of each sequence header using a local sed module... it might be sufficient + if ( [params.build_kaiju].any() ) { + + // Pull just AA sequences + ch_refs_for_singleref = ch_input.dump(tag: 'premap') + .map{meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_aa]} + .filter{meta, fasta_aa -> + fasta_aa + } + .groupTuple().dump(tag: "cat_input") + + // Place in single file + ch_singleref_for_aa = CAT_CAT_AA ( ch_refs_for_singleref ) + ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first()) + } // - // MODULE: Run FastQC + // MODULE: Run KAIJU/MKFMI // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + if ( params.build_kaiju ) { + KAIJU_MKFMI ( CAT_CAT_AA.out.file_out ) + ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first()) + } + + if ( params.build_diamond ) { + DIAMOND_MAKEDB ( CAT_CAT_AA.out.file_out, params.prot2taxid, params.nodesdmp, params.namesdmp ) + ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) + } CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') @@ -99,7 +131,6 @@ workflow CREATETAXDB { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(), From 4c4de257cd480c3160c68764ff79c08c71f11a8a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 5 Dec 2023 22:03:06 +0100 Subject: [PATCH 02/20] Add DIAMOND output description and update CITATIONS.md --- CITATIONS.md | 8 ++++++++ docs/output.md | 20 +++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 4dfe4c1..4f201cb 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -39,3 +39,11 @@ - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + +- [DIAMOND](https://doi.org/10.1038/nmeth.3176) + + > Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. https://doi.org/10.1038/nmeth.3176 + +- [Kaiju](https://doi.org/10.1038/ncomms11257) + +> Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. https://doi.org/10.1038/ncomms11257 diff --git a/docs/output.md b/docs/output.md index 4d5a517..b817d92 100644 --- a/docs/output.md +++ b/docs/output.md @@ -20,14 +20,28 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
Output files +- `diamond/` + - `.dmnd`: DIAMOND dmnd database file + +
+ +[DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment. + +The `dmnd` file can be given to one of the DIAMOND alignment commands with `diamond blast -d .dmnd` etc. + +### Kaiju + +
+Output files + - `kaiju/` - - `.fmi`: Kaiju FMI file + - `.fmi`: Kaiju FMI index file
-[Kaiju](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) is a fast and sensitive taxonomic classification for metagenomics utilising nucletoide to protein translations. +[Kaiju](https://bioinformatics-centre.github.io/kaiju/) is a fast and sensitive taxonomic classification for metagenomics utilising nucletoide to protein translations. -The `fmi` file can be given to kaiju itself with `kaiju -f .fmi` etc. ### MultiQC From 7e77a295a7d452e3c4e4f0a25fefa77cbb810cdc Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 11:01:17 +0100 Subject: [PATCH 03/20] Fix validation to require at least either DNA or AA Fasta --- assets/schema_input.json | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index caa0ff8..8a9c010 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -12,15 +12,7 @@ "pattern": "^\\S+$", "unique": true, "errorMessage": "Sequence reference name must be provided and cannot contain spaces", - "meta": ["id"], - "anyOf": [ - { - "dependentRequired": ["fasta_dna"] - }, - { - "dependentRequired": ["fasta_aa"] - } - ] + "meta": ["id"] }, "taxid": { "type": "integer", @@ -61,6 +53,14 @@ "format": "file-path" } }, - "required": ["id", "taxid"] + "required": ["id", "taxid"], + "anyOf": [ + { + "required": ["fasta_dna"] + }, + { + "required": ["fasta_aa"] + } + ] } } From c060918909209bdcb74726483c5e48c217c448d5 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 11:26:17 +0100 Subject: [PATCH 04/20] Add test config --- conf/test.config | 13 ++++++++----- lib/WorkflowCreatetaxdb.groovy | 6 +++--- workflows/createtaxdb.nf | 10 +++++++--- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/conf/test.config b/conf/test.config index b75cfc8..02c5cb4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,10 +20,13 @@ params { max_time = '6.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - // Genome references - genome = 'R64-1-1' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/samplesheets/test.csv' + + build_kaiju = true + build_diamond = true + + prot2taxid = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot.accession2taxid.gz' + nodesdmp = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot_nodes.dmp' + namesdmp = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot_names.dmp' } diff --git a/lib/WorkflowCreatetaxdb.groovy b/lib/WorkflowCreatetaxdb.groovy index 63827ee..988392e 100755 --- a/lib/WorkflowCreatetaxdb.groovy +++ b/lib/WorkflowCreatetaxdb.groovy @@ -15,9 +15,9 @@ class WorkflowCreatetaxdb { genomeExistsError(params, log) - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - } + // if (!params.fasta) { + // Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + // } } // diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 34690aa..74d5803 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -26,7 +26,7 @@ def checkPathParamList = [ for (param in checkPathParamList) if (param) file(param, checkIfExists: true) // Validate parameter combinations -if ( params.build_diamond && ![params.prot2taxid, params.nodesdmp, params.namesdmp,].any() ) { error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp') } +if ( params.build_diamond && [!params.prot2taxid, !params.nodesdmp, !params.namesdmp,].any() ) { error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp (all are mandatory for DIAMOND)') } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -88,12 +88,12 @@ workflow CREATETAXDB { if ( [params.build_kaiju].any() ) { // Pull just AA sequences - ch_refs_for_singleref = ch_input.dump(tag: 'premap') + ch_refs_for_singleref = ch_input .map{meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_aa]} .filter{meta, fasta_aa -> fasta_aa } - .groupTuple().dump(tag: "cat_input") + .groupTuple() // Place in single file ch_singleref_for_aa = CAT_CAT_AA ( ch_refs_for_singleref ) @@ -109,6 +109,10 @@ workflow CREATETAXDB { ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first()) } + // TODO + // - Schema build + // - Test data + // - WorkflowCreatetaxdb thing if ( params.build_diamond ) { DIAMOND_MAKEDB ( CAT_CAT_AA.out.file_out, params.prot2taxid, params.nodesdmp, params.namesdmp ) ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) From 93f465136e8a504dd26af4ebe33ff1403dfd7b79 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:14:19 +0100 Subject: [PATCH 05/20] Minor formatting --- nextflow.config | 1 - nextflow_schema.json | 95 ++++++++++++++++++++++++---------------- workflows/createtaxdb.nf | 5 +-- 3 files changed, 59 insertions(+), 42 deletions(-) diff --git a/nextflow.config b/nextflow.config index 0a11985..e6be530 100644 --- a/nextflow.config +++ b/nextflow.config @@ -68,7 +68,6 @@ params { // tool specific options build_kaiju = false - build_diamond = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index 4fdc189..6114641 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,7 +19,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/createtaxdb/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the reference genomes you wish to build into a metagenomic profiling database. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/createtaxdb/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv", "schema": "assets/schema_input.json" }, @@ -40,9 +40,58 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "dbname": { + "type": "string", + "default": "database", + "description": "Specify name that resulting databases will be prefixed with.", + "fa_icon": "fas fa-id-badge" + }, + "prot2taxid": { + "type": "string", + "fa_icon": "fas fa-address-card", + "description": "Path to (NCBI-style) protein accession2taxid file.", + "help_text": "A two column file tab-separated file with `accession.version` and `taxid`. The first refers to an accession ID in each FASTA entry header. The second refers to the taxonomy ID of the organism the sequence belongs to, as listed in `nodes.dmp`." + }, + "nuc2taxid": { + "type": "string", + "fa_icon": "far fa-address-card", + "description": "Path to (NCBI-style) nucleotide accession2taxid file.", + "help_text": "A two column file tab-separated file with `accession.version` and `taxid`. The first refers to an accession ID in each FASTA entry header. The second refers to the taxonomy ID of the organism the sequence belongs to, as listed in `nodes.dmp`." + }, + "nodesdmp": { + "type": "string", + "fa_icon": "fas fa-circle", + "description": "Path to NCBI-style taxonomy node dmp file.", + "help_text": "A tab/pipe/tab separated table file. See nodes.dmp section of [NCBI taxdump README](https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/taxdump_readme.txt) for column file structure." + }, + "namesdmp": { + "type": "string", + "fa_icon": "fas fa-tag", + "description": "Path to NCBI-style taxonomy names dmp file.", + "help_text": "A tab/pipe/tab separated table file. See names.dmp section of [NCBI taxdump README](https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/taxdump_readme.txt) for column file structure." } } }, + "database_building_options": { + "title": "Database Building Options", + "type": "object", + "description": "", + "default": "", + "properties": { + "build_diamond": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input." + }, + "build_kaiju": { + "type": "boolean", + "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", + "fa_icon": "fas fa-toggle-on" + } + }, + "fa_icon": "fas fa-database" + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -53,7 +102,8 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "hidden": true }, "fasta": { "type": "string", @@ -63,7 +113,8 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "fa_icon": "far fa-file-code", + "hidden": true }, "igenomes_ignore": { "type": "boolean", @@ -267,41 +318,15 @@ "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } - }, - "new_group_1": { - "title": "New Group 1", - "type": "object", - "description": "", - "default": "", - "properties": { - "dbname": { - "type": "string", - "default": "database", - "description": "Specify name that resulting databases will be prefixed with.", - "fa_icon": "fas fa-id-badge" - } - }, - "fa_icon": "fas fa-globe" - }, - "new_group_2": { - "title": "New Group 2", - "type": "object", - "description": "", - "default": "", - "properties": { - "build_kaiju": { - "type": "boolean", - "description": "Turn on building of KAIJU database. Requires protein files.", - "fa_icon": "fas fa-toggle-on" - } - }, - "fa_icon": "fas fa-hard-hat" } }, "allOf": [ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/database_building_options" + }, { "$ref": "#/definitions/reference_genome_options" }, @@ -313,12 +338,6 @@ }, { "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/new_group_1" - }, - { - "$ref": "#/definitions/new_group_2" } ] } diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 74d5803..e806efb 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -82,6 +82,7 @@ workflow CREATETAXDB { // Prepare input for single file inputs modules // TODO: Need to have a modification step to get header correct to actually run with kaiju... + // TEST first! // docs: https://github.com/bioinformatics-centre/kaiju#custom-database // docs: https://github.com/nf-core/test-datasets/tree/taxprofiler#kaiju // idea: try just appending `_` to end of each sequence header using a local sed module... it might be sufficient @@ -110,9 +111,7 @@ workflow CREATETAXDB { } // TODO - // - Schema build - // - Test data - // - WorkflowCreatetaxdb thing + // - nf-test if ( params.build_diamond ) { DIAMOND_MAKEDB ( CAT_CAT_AA.out.file_out, params.prot2taxid, params.nodesdmp, params.namesdmp ) ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) From fc076b9c49908da010d22c2a0e237ece3ca9c185 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:15:35 +0100 Subject: [PATCH 06/20] Start adding nf-test --- .gitignore | 2 ++ nf-test.config | 16 ++++++++++++++++ tests/nextflow.config | 5 +++++ 3 files changed, 23 insertions(+) create mode 100644 nf-test.config create mode 100644 tests/nextflow.config diff --git a/.gitignore b/.gitignore index 5124c9a..f704e54 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ results/ testing/ testing* *.pyc +.nf-test* +test.xml diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..cb656b9 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,16 @@ +config { + // location for all nf-tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir "/tmp" + + // location of library folder that is added automatically to the classpath + libDir "lib/" + + // location of an optional nextflow.config file specific for executing tests + configFile "nextflow.config" + + // run all test with the defined docker profile from the main nextflow.config + profile "" +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..c19b1ad --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,5 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ From 9e0cf906145cce106f89fe1ab79838505e01af86 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:28:11 +0100 Subject: [PATCH 07/20] Start adding basic nf-test structrue --- nf-test.config | 2 +- tests/tags.yml | 2 ++ tests/test.nf.test | 29 +++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tests/tags.yml create mode 100644 tests/test.nf.test diff --git a/nf-test.config b/nf-test.config index cb656b9..e92f96b 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,6 +1,6 @@ config { // location for all nf-tests - testsDir "." + testsDir "tests/" // nf-test directory including temporary files for each test workDir "/tmp" diff --git a/tests/tags.yml b/tests/tags.yml new file mode 100644 index 0000000..1e63ed9 --- /dev/null +++ b/tests/tags.yml @@ -0,0 +1,2 @@ +nfcore_createtaxdb: + - ./** diff --git a/tests/test.nf.test b/tests/test.nf.test new file mode 100644 index 0000000..0bec0d5 --- /dev/null +++ b/tests/test.nf.test @@ -0,0 +1,29 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_CREATETAXDB" + script "main.nf" + tag "pipeline" + tag "nfcore_createtaxdb" + tag "test" + + test("test_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${params.outdir}/diamond/database.dmnd"), + path("${params.outdir}/kaiju/database.fmi"), + path("${params.outdir}/pipeline_info/software_versions.yml") + ).match() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() } + ) + } + } +} From 08f0c1ffe6f57fdbf59ed158b71a8f06bb604636 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:32:14 +0100 Subject: [PATCH 08/20] Add pipeline-level CI testing with nf-test in GHA --- .github/workflows/ci.yml | 74 ++++++++++++++++++++++++++++++++-------- tests/test.nf.test.snap | 10 ++++++ 2 files changed, 69 insertions(+), 15 deletions(-) create mode 100644 tests/test.nf.test.snap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83ce954..c070b0b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,43 +1,87 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: - - dev + - "dev" pull_request: + branches: + - "dev" + - "master" release: - types: [published] + types: + - "published" env: NXF_ANSI_LOG: false + NFTEST_VER: "0.7.3" concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: + define_nxf_versions: + name: Choose nextflow versions to test against depending on target branch + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.nxf_versions.outputs.matrix }} + steps: + - id: nxf_versions + run: | + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT + else + echo matrix='["latest-everything", "23.04.0"]' | tee -a $GITHUB_OUTPUT + fi + test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/createtaxdb') }}" + name: nf-test + needs: define_nxf_versions runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" + NXF_VER: ${{ fromJson(needs.define_nxf_versions.outputs.matrix) }} + tags: + - "test" + profile: + - "docker" + steps: - name: Check out pipeline code uses: actions/checkout@v3 + - name: Check out test data + uses: actions/checkout@v3 + with: + repository: nf-core/test-datasets + ref: createtaxdb + path: test-datasets/ + fetch-depth: 1 + - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ + + - name: Run nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nf-test test --tag ${{ matrix.tags }} --profile ${{ matrix.tags }},${{ matrix.profile }} --junitxml=test.xml + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/output/pipeline_info/software_versions.yml + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: "*.xml" diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap new file mode 100644 index 0000000..d10489c --- /dev/null +++ b/tests/test.nf.test.snap @@ -0,0 +1,10 @@ +{ + "test_profile": { + "content": [ + "database.dmnd:md5,9d57aa88cd1766adfda8360876fc0e4f", + "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", + "software_versions.yml:md5,e807fbfb5969b28495024decc5a639a1" + ], + "timestamp": "2023-12-14T12:29:56.030615203" + } +} \ No newline at end of file From b5df7fad31d20b810c8fefd2571b66d3a0a0f1cd Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:34:40 +0100 Subject: [PATCH 09/20] Update CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7706b5c..233fe7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release of nf-core/createtaxdb, created with the [nf-core](https://nf-co.re/) template. +Adds database building support for: + +- DIAMOND (added by @jfy133) +- Kaiju (added by @jfy133) + ### `Added` ### `Fixed` From 383346d92f130168ea7f50e20d2d4c0ed48ce7f9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:44:42 +0100 Subject: [PATCH 10/20] Ignore actions-ci due to version specificaiton change for nf-test --- .nf-core.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..e58b442 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,2 @@ repository_type: pipeline +actions_ci: False From 9678df6a9a1c27bbb3e062a28855b8a82a3a8804 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:45:20 +0100 Subject: [PATCH 11/20] Fix nf-core linting --- .github/CONTRIBUTING.md | 3 +++ .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/linting.yml | 14 +++++++------- lib/NfcoreTemplate.groovy | 32 ++++++++++++++++++-------------- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 29abf21..75d81e7 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2a1855d..2cfb529 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/crea - [ ] If necessary, also make a PR on the nf-core/createtaxdb _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd21..5079018 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,14 +71,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v4 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653..e248e4c 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // From 046e1daf74ae4e4a272bd2fb853a41321a4c6dca Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:46:18 +0100 Subject: [PATCH 12/20] Ignore actions_ci lint --- .nf-core.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.nf-core.yml b/.nf-core.yml index e58b442..04e9f03 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,2 +1,4 @@ repository_type: pipeline -actions_ci: False +## TODO: re-activate once nf-test ci.yml structure updated +lint: + actions_ci: False From cc6c2d7010ee2f1e7e4e25eb1e8a02962f6af40a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:51:25 +0100 Subject: [PATCH 13/20] Bump minimum version --- .github/workflows/ci.yml | 2 +- lib/WorkflowCreatetaxdb.groovy | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c070b0b..807c35c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT else - echo matrix='["latest-everything", "23.04.0"]' | tee -a $GITHUB_OUTPUT + echo matrix='["latest-everything", "23.10.0"]' | tee -a $GITHUB_OUTPUT fi test: diff --git a/lib/WorkflowCreatetaxdb.groovy b/lib/WorkflowCreatetaxdb.groovy index 988392e..5814e2a 100755 --- a/lib/WorkflowCreatetaxdb.groovy +++ b/lib/WorkflowCreatetaxdb.groovy @@ -58,7 +58,6 @@ class WorkflowCreatetaxdb { // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", - "FastQC (Andrews 2010),", "MultiQC (Ewels et al. 2016)", "." ].join(' ').trim() @@ -72,7 +71,6 @@ class WorkflowCreatetaxdb { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() From e8fb47bd64856093b8d1c715d392f58fe86f07e0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 12:57:10 +0100 Subject: [PATCH 14/20] Move software versions out to just exists because of NXF version changes (I think) --- tests/test.nf.test | 2 +- tests/test.nf.test.snap | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test.nf.test b/tests/test.nf.test index 0bec0d5..cd9eac3 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -20,8 +20,8 @@ nextflow_pipeline { { assert snapshot( path("${params.outdir}/diamond/database.dmnd"), path("${params.outdir}/kaiju/database.fmi"), - path("${params.outdir}/pipeline_info/software_versions.yml") ).match() }, + { assert new File("$outputDir/pipeline_info/software_versions.yml").exists() }, { assert new File("$outputDir/multiqc/multiqc_report.html").exists() } ) } diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index d10489c..21c8776 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -2,9 +2,8 @@ "test_profile": { "content": [ "database.dmnd:md5,9d57aa88cd1766adfda8360876fc0e4f", - "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", - "software_versions.yml:md5,e807fbfb5969b28495024decc5a639a1" + "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598" ], - "timestamp": "2023-12-14T12:29:56.030615203" + "timestamp": "2023-12-14T12:55:54.070245428" } } \ No newline at end of file From d2d0702fdc6ec84a03149c48791cd64d3c73860d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 14 Dec 2023 12:02:53 +0000 Subject: [PATCH 15/20] Fix linting (back to tools 2.10 template) --- .github/CONTRIBUTING.md | 3 --- .github/PULL_REQUEST_TEMPLATE.md | 1 - .github/workflows/linting.yml | 14 +++++++------- lib/NfcoreTemplate.groovy | 32 ++++++++++++++------------------ 4 files changed, 21 insertions(+), 29 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 75d81e7..29abf21 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,9 +27,6 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests -You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to -receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. - When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2cfb529..2a1855d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,7 +19,6 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/crea - [ ] If necessary, also make a PR on the nf-core/createtaxdb _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). -- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 5079018..b8bdd21 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Check code lints with Black uses: psf/black@stable @@ -71,14 +71,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v4 with: - python-version: "3.12" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index e248e4c..01b8653 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,7 +4,6 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput -import nextflow.extension.FilesEx class NfcoreTemplate { @@ -142,14 +141,12 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -158,16 +155,14 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + def output_tf = new File(output_d, "pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() } // @@ -232,14 +227,15 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def output_pf = new File(output_d, "params_${timestamp}.json") def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() + output_pf.text = JsonOutput.prettyPrint(jsonStr) } // From b08d719f6513650ffd40e8cef06d0a547ee5b6b9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 14 Dec 2023 13:10:27 +0100 Subject: [PATCH 16/20] Add citation text. --- lib/WorkflowCreatetaxdb.groovy | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/WorkflowCreatetaxdb.groovy b/lib/WorkflowCreatetaxdb.groovy index 5814e2a..ace0fe0 100755 --- a/lib/WorkflowCreatetaxdb.groovy +++ b/lib/WorkflowCreatetaxdb.groovy @@ -58,7 +58,9 @@ class WorkflowCreatetaxdb { // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", - "MultiQC (Ewels et al. 2016)", + params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", + params.build_kaiju ? "Kaiju (Menzel et al. 2016)," : "", + "and MultiQC (Ewels et al. 2016)", "." ].join(' ').trim() @@ -67,10 +69,11 @@ class WorkflowCreatetaxdb { public static String toolBibliographyText(params) { - // TODO Optionally add bibliographic entries to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ + params.build_diamond ? "
  • Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. 10.1038/nmeth.3176
  • " : "", + params.build_kaiju ? "
  • Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. 10.1038/ncomms11257
  • " : "", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() @@ -91,9 +94,8 @@ class WorkflowCreatetaxdb { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) + meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText(params) def methods_text = mqc_methods_yaml.text From 1a0e5ba02c261a086845bf4634b2baf87ab261a0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 5 Jan 2024 17:11:38 +0100 Subject: [PATCH 17/20] Apply suggestions from code review Co-authored-by: Joon Klaps <61584065+Joon-Klaps@users.noreply.github.com> --- assets/test.csv | 4 ++-- docs/output.md | 2 +- workflows/createtaxdb.nf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/test.csv b/assets/test.csv index b7af347..3fde3a4 100644 --- a/assets/test.csv +++ b/assets/test.csv @@ -1,3 +1,3 @@ id,taxid,fasta_dna,fasta_aa -Severe_acute_respiratory_syndrome_coronavirus_2,2697049,/home/james/Downloads/createtaxdb/sarscov2.fasta,/home/james/Downloads/createtaxdb/sarscov2.faa -Haemophilus_influenzae,727,/home/james/Downloads/createtaxdb/haemophilus_infuenzae.fna.gz, + Severe_acute_respiratory_syndrome_coronavirus_2,2697049,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.fasta,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.faa +Haemophilus_influenzae,727,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/haemophilus_influenzae.fna.gz, diff --git a/docs/output.md b/docs/output.md index b817d92..1254ad4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -15,7 +15,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -### Kaiju +### Diamond
    Output files diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index e806efb..b27b79a 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -86,7 +86,7 @@ workflow CREATETAXDB { // docs: https://github.com/bioinformatics-centre/kaiju#custom-database // docs: https://github.com/nf-core/test-datasets/tree/taxprofiler#kaiju // idea: try just appending `_` to end of each sequence header using a local sed module... it might be sufficient - if ( [params.build_kaiju].any() ) { + if ( [params.build_kaiju, params.build_diamond].any() ) { // Pull just AA sequences ch_refs_for_singleref = ch_input From a59f2a2b2369bf0b649932179d98452881f263fd Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 5 Jan 2024 17:26:57 +0100 Subject: [PATCH 18/20] First set of changes after review --- assets/test.csv | 2 +- modules.json | 31 +++-- modules/nf-core/fastqc/environment.yml | 7 -- modules/nf-core/fastqc/main.nf | 55 --------- modules/nf-core/fastqc/meta.yml | 57 --------- modules/nf-core/fastqc/tests/main.nf.test | 109 ------------------ .../nf-core/fastqc/tests/main.nf.test.snap | 10 -- modules/nf-core/fastqc/tests/tags.yml | 2 - 8 files changed, 20 insertions(+), 253 deletions(-) delete mode 100644 modules/nf-core/fastqc/environment.yml delete mode 100644 modules/nf-core/fastqc/main.nf delete mode 100644 modules/nf-core/fastqc/meta.yml delete mode 100644 modules/nf-core/fastqc/tests/main.nf.test delete mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap delete mode 100644 modules/nf-core/fastqc/tests/tags.yml diff --git a/assets/test.csv b/assets/test.csv index 3fde3a4..52ac082 100644 --- a/assets/test.csv +++ b/assets/test.csv @@ -1,3 +1,3 @@ id,taxid,fasta_dna,fasta_aa - Severe_acute_respiratory_syndrome_coronavirus_2,2697049,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.fasta,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.faa +Severe_acute_respiratory_syndrome_coronavirus_2,2697049,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.fasta,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/sarscov2.faa Haemophilus_influenzae,727,https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/fasta/haemophilus_influenzae.fna.gz, diff --git a/modules.json b/modules.json index 0fdafe4..1e9a475 100644 --- a/modules.json +++ b/modules.json @@ -8,40 +8,47 @@ "cat/cat": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "diamond/makedb": { "branch": "master", "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c", - "installed_by": ["modules"] - }, - "fastqc": { - "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/mkfmi": { "branch": "master", "git_sha": "7365564c402cbd01e9407810730efd10039997a3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "malt/build": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "4ab13872435962dadc239979554d13709e20bf29", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml deleted file mode 100644 index 1787b38..0000000 --- a/modules/nf-core/fastqc/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: fastqc -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 9e19a74..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - - fastqc \\ - $args \\ - --threads $task.cpus \\ - $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index ee5507e..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index b9e8f92..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,109 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("Single-Read") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
    Mon 2 Oct 2023
    test.gz
    - // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } - ) - } - } -// TODO -// // -// // Test with paired-end data -// // -// workflow test_fastqc_paired_end { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with interleaved data -// // -// workflow test_fastqc_interleaved { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with bam data -// // -// workflow test_fastqc_bam { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with multiple samples -// // -// workflow test_fastqc_multiple { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with custom prefix -// // -// workflow test_fastqc_custom_prefix { -// input = [ -// [ id:'mysample', single_end:true ], // meta map -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index 636a32c..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,10 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "timestamp": "2023-10-09T23:40:54+0000" - } -} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml deleted file mode 100644 index 7834294..0000000 --- a/modules/nf-core/fastqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastqc: - - modules/nf-core/fastqc/** From ad9a61bdcdd35c53dbf0dbedeb92a714d0b335f8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 5 Jan 2024 17:29:35 +0100 Subject: [PATCH 19/20] Standardise test output dir variable --- tests/test.nf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test.nf.test b/tests/test.nf.test index cd9eac3..6b4e9a8 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -18,8 +18,8 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("${params.outdir}/diamond/database.dmnd"), - path("${params.outdir}/kaiju/database.fmi"), + path("$outputDir/diamond/database.dmnd"), + path("$outputDir/kaiju/database.fmi"), ).match() }, { assert new File("$outputDir/pipeline_info/software_versions.yml").exists() }, { assert new File("$outputDir/multiqc/multiqc_report.html").exists() } From a317cbe76ea5cd142ab08d3c8b464c910a85c876 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 5 Jan 2024 17:34:52 +0100 Subject: [PATCH 20/20] prettier --- modules.json | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/modules.json b/modules.json index 1e9a475..b1dffc7 100644 --- a/modules.json +++ b/modules.json @@ -8,47 +8,35 @@ "cat/cat": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "diamond/makedb": { "branch": "master", "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kaiju/mkfmi": { "branch": "master", "git_sha": "7365564c402cbd01e9407810730efd10039997a3", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "malt/build": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "4ab13872435962dadc239979554d13709e20bf29", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } } } -} \ No newline at end of file +}