From 612b68129ba458993fba0759500fe9f000aa8751 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 15 Sep 2023 13:40:45 +0200 Subject: [PATCH 1/5] Add bin_sizes parameter --- CHANGELOG.md | 5 +++++ conf/modules.config | 1 + conf/test.config | 2 ++ nextflow.config | 3 ++- nextflow_schema.json | 5 +++++ tests/main.nf.test | 4 +++- workflows/cmgg-wisecondorx.nf | 5 +++++ 7 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40dd837..c8745bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.1.0 - Naive Junior - [19 June 2023] + +## New features +1. Added a new parameter `--bin_sizes` that takes a comma-delimited list of bin sizes to create references for. This will make it possible to create references for multiple bin sizes at once. + ## v1.0.1 - Helpful Apprentice - [19 June 2023] ### New features diff --git a/conf/modules.config b/conf/modules.config index 971ccc3..238e210 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -17,6 +17,7 @@ process { ] withName: WISECONDORX_NEWREF { + ext.prefix = {"${meta.id}.${meta.bin_size}kbp"} publishDir = [ enabled: true, path: { "${params.outdir}" }, diff --git a/conf/test.config b/conf/test.config index bcffe00..295741a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,6 +21,8 @@ params { genomes_ignore = true + bin_sizes = "10,5" + fasta = params.test_data["homo_sapiens"]["genome"]["genome_fasta"] fai = null //params.test_data["homo_sapiens"]["genome"]["genome_fasta_fai"] diff --git a/nextflow.config b/nextflow.config index 183095d..5b72ff2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,7 @@ params { // Other options no_metrics = false + bin_sizes = "1000,500,50,30,15,10,5,1" // References genome = "GRCh38" @@ -234,7 +235,7 @@ manifest { description = """A nextflow pipeline for creating references for WisecondorX""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.0.1' + version = '1.1.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 655ad48..f7cad18 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -56,6 +56,11 @@ "no_metrics": { "type": "boolean", "description": "Don't create the metrics text file" + }, + "bin_sizes": { + "type": "string", + "description": "A comma-delimited list of bin sizes to use for the analysis", + "default": "1000,500,50,30,15,10,5,1" } } }, diff --git a/tests/main.nf.test b/tests/main.nf.test index ed64654..694db1a 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -9,12 +9,14 @@ nextflow_pipeline { when { params { prefix = "test_reference" + bin_sizes = "10,5" } } then { assert workflow.success - assert file("${outputDir}/test_reference.npz").exists() + assert file("${outputDir}/test_reference.10kbp.npz").exists() + assert file("${outputDir}/test_reference.5kbp.npz").exists() assert file("${outputDir}/metrics.txt").exists() assert file("${outputDir}/multiqc_reports/multiqc_report.html").exists() } diff --git a/workflows/cmgg-wisecondorx.nf b/workflows/cmgg-wisecondorx.nf index 36c3557..ddea5c8 100644 --- a/workflows/cmgg-wisecondorx.nf +++ b/workflows/cmgg-wisecondorx.nf @@ -184,6 +184,11 @@ workflow CMGGWISECONDORX { [ new_meta, npz ] } .groupTuple() // All files should be present here, so no size is needed + .combine(params.bin_sizes.split(",")) + .map { meta, npz, bin_size -> + new_meta = meta + [bin_size:bin_size] + [ new_meta, npz ] + } .set { ch_newref_input } WISECONDORX_NEWREF(ch_newref_input) From 5d58dba3e341d6915aa13456c7a34ce1b13fa1ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 15 Sep 2023 13:42:23 +0200 Subject: [PATCH 2/5] prettier --- .prettierignore | 1 + CHANGELOG.md | 1 + 2 files changed, 2 insertions(+) diff --git a/.prettierignore b/.prettierignore index 437d763..486e0f9 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,3 +10,4 @@ testing/ testing* *.pyc bin/ +.nf-test/ diff --git a/CHANGELOG.md b/CHANGELOG.md index c8745bd..d04b3da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.1.0 - Naive Junior - [19 June 2023] ## New features + 1. Added a new parameter `--bin_sizes` that takes a comma-delimited list of bin sizes to create references for. This will make it possible to create references for multiple bin sizes at once. ## v1.0.1 - Helpful Apprentice - [19 June 2023] From 5a67139b03cf63b6e79ce60a81125b217ea3b0b2 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 15 Sep 2023 13:59:18 +0200 Subject: [PATCH 3/5] fix linting --- .github/CONTRIBUTING.md | 1 - lib/NfcoreTemplate.groovy | 2 +- modules.json | 6 ++--- .../custom/dumpsoftwareversions/main.nf | 6 ++--- modules/nf-core/multiqc/main.nf | 6 ++--- modules/nf-core/samtools/faidx/main.nf | 16 +++++++++---- modules/nf-core/samtools/faidx/meta.yml | 14 +++++++++-- nextflow.config | 7 +++--- nextflow_schema.json | 24 +++++++++++++++---- workflows/cmgg-wisecondorx.nf | 5 +++- 10 files changed, 59 insertions(+), 28 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4eae233..4025bd8 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -110,4 +110,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2023f45..000bb92 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/modules.json b/modules.json index 6e0c299..308898e 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] }, "ngsbits/samplegender": { @@ -23,7 +23,7 @@ }, "samtools/faidx": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, "samtools/index": { diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc8727..c9d014b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387b..65d7dd0 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 4dd0e5b..59ed308 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -9,11 +9,13 @@ process SAMTOOLS_FAIDX { input: tuple val(meta), path(fasta) + tuple val(meta2), path(fai) output: - tuple val(meta), path ("*.fai"), emit: fai - tuple val(meta), path ("*.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $args \\ - $fasta + $fasta \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,8 +35,12 @@ process SAMTOOLS_FAIDX { """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' """ + ${fastacmd} touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index fe2fe9a..957b25e 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -3,6 +3,7 @@ description: Index FASTA file keywords: - index - fasta + - faidx tools: - samtools: description: | @@ -17,12 +18,21 @@ input: - meta: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: FASTA file pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - meta: type: map diff --git a/nextflow.config b/nextflow.config index 5b72ff2..18e5bce 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,10 +43,9 @@ params { help = false version = false validate_params = true - show_hidden_params = false - validationSchemaIgnoreParams = 'genomes,test_data,schema_ignore_params' - schema_ignore_params = 'genomes,test_data' - + validationSchemaIgnoreParams = 'genomes,test_data,igenomes_base' + validationShowHiddenParams = false + validate_params = true // Config options custom_config_version = 'master' diff --git a/nextflow_schema.json b/nextflow_schema.json index f7cad18..2fa2094 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -301,17 +301,31 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationSchemaIgnoreParams": { + "type": "string", + "default": "genomes,test_data", + "hidden": true + }, + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "validationSchemaIgnoreParams": { - "type": "string", - "default": "genomes,test_data,schema_ignore_params", - "hidden": true + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/workflows/cmgg-wisecondorx.nf b/workflows/cmgg-wisecondorx.nf index ddea5c8..38d506a 100644 --- a/workflows/cmgg-wisecondorx.nf +++ b/workflows/cmgg-wisecondorx.nf @@ -76,7 +76,10 @@ workflow CMGGWISECONDORX { .collect() if(!params.fai) { - SAMTOOLS_FAIDX(ch_fasta) + SAMTOOLS_FAIDX( + ch_fasta, + [[],[]] + ) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) SAMTOOLS_FAIDX.out.fai From 375e45ef2fb49e75a78c1c177e59dfc1c5cd0b7a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 15 Sep 2023 14:10:40 +0200 Subject: [PATCH 4/5] fix bin_size --- conf/modules.config | 3 ++- conf/test.config | 2 +- nextflow.config | 2 +- nextflow_schema.json | 2 +- tests/main.nf.test | 4 ++-- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 238e210..9325b91 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -17,7 +17,8 @@ process { ] withName: WISECONDORX_NEWREF { - ext.prefix = {"${meta.id}.${meta.bin_size}kbp"} + ext.prefix = {"${meta.id}_${meta.bin_size as Integer > 1000 ? meta.bin_size as Integer/1000 : meta.bin_size}${meta.bin_size as Integer > 1000 ? "k" : ""}bp"} + ext.args = { "--binsize ${meta.bin_size}" } publishDir = [ enabled: true, path: { "${params.outdir}" }, diff --git a/conf/test.config b/conf/test.config index 295741a..2adb674 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,7 +21,7 @@ params { genomes_ignore = true - bin_sizes = "10,5" + bin_sizes = "10000,5" fasta = params.test_data["homo_sapiens"]["genome"]["genome_fasta"] fai = null //params.test_data["homo_sapiens"]["genome"]["genome_fasta_fai"] diff --git a/nextflow.config b/nextflow.config index 18e5bce..4d25b17 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ params { // Other options no_metrics = false - bin_sizes = "1000,500,50,30,15,10,5,1" + bin_sizes = "1000000,500000,50000,30000,15000,10000,5000,1000" // References genome = "GRCh38" diff --git a/nextflow_schema.json b/nextflow_schema.json index 2fa2094..f61d761 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -60,7 +60,7 @@ "bin_sizes": { "type": "string", "description": "A comma-delimited list of bin sizes to use for the analysis", - "default": "1000,500,50,30,15,10,5,1" + "default": "1000000,500000,50000,30000,15000,10000,5000,1000" } } }, diff --git a/tests/main.nf.test b/tests/main.nf.test index 694db1a..7a83b6f 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -9,14 +9,14 @@ nextflow_pipeline { when { params { prefix = "test_reference" - bin_sizes = "10,5" + bin_sizes = "10000,5" } } then { assert workflow.success assert file("${outputDir}/test_reference.10kbp.npz").exists() - assert file("${outputDir}/test_reference.5kbp.npz").exists() + assert file("${outputDir}/test_reference.5bp.npz").exists() assert file("${outputDir}/metrics.txt").exists() assert file("${outputDir}/multiqc_reports/multiqc_report.html").exists() } From 344d965e2f27e7a71012fef3ecaacf5f2b2712f7 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 15 Sep 2023 14:42:28 +0200 Subject: [PATCH 5/5] fix tests --- tests/main.nf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index 7a83b6f..02eba75 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -15,8 +15,8 @@ nextflow_pipeline { then { assert workflow.success - assert file("${outputDir}/test_reference.10kbp.npz").exists() - assert file("${outputDir}/test_reference.5bp.npz").exists() + assert file("${outputDir}/test_reference_10kbp.npz").exists() + assert file("${outputDir}/test_reference_5bp.npz").exists() assert file("${outputDir}/metrics.txt").exists() assert file("${outputDir}/multiqc_reports/multiqc_report.html").exists() }