From 6838c985118b0183dd2ff5ea7cd2d2b082bd4d14 Mon Sep 17 00:00:00 2001 From: Peter Kruczkiewicz Date: Wed, 2 Aug 2023 16:04:45 -0500 Subject: [PATCH] Remove gfflu/snpeff processes from PR#29 for #35 bugfix --- CHANGELOG.md | 6 +++ assets/multiqc_config.yaml | 7 ---- modules/local/gfflu.nf | 29 -------------- modules/local/multiqc.nf | 1 - modules/local/snpeff_ann.nf | 72 ----------------------------------- modules/local/snpeff_build.nf | 53 -------------------------- workflows/nanopore.nf | 23 ----------- 7 files changed, 6 insertions(+), 185 deletions(-) delete mode 100644 modules/local/gfflu.nf delete mode 100644 modules/local/snpeff_ann.nf delete mode 100644 modules/local/snpeff_build.nf diff --git a/CHANGELOG.md b/CHANGELOG.md index 54fa4fe..17e7961 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[3.3.1](https://github.com/CFIA-NCFAD/nf-flu/releases/tag/3.3.1)] - 2023-08-02 + +### Fixes + +* Conda/Mamba env creation when using `conda`/`mamba` profile (#35) + ## [[3.3.0](https://github.com/CFIA-NCFAD/nf-flu/releases/tag/3.3.0)] - 2023-07-11 This release migrates to more recently updated Influenza virus sequences since the last update for the [NCBI Influenza DB FTP data](https://ftp.ncbi.nih.gov/genomes/INFLUENZA/) was in 2020-10-13. By default, all Orthomyxoviridae virus sequences were parsed from the daily updated NCBI Viruses [`AllNucleotide.fa`](https://ftp.ncbi.nlm.nih.gov/genomes/Viruses/AllNucleotide/) and [`AllNuclMetadata.csv.gz`](https://ftp.ncbi.nlm.nih.gov/genomes/Viruses/AllNuclMetadata/AllNuclMetadata.csv.gz) and uploaded to [Figshare](https://figshare.com/articles/dataset/2023-06-14_-_NCBI_Viruses_-_Orthomyxoviridae/23608782) as Zstd compressed files. nf-flu no longer uses the [influenza.fna.gz](https://ftp.ncbi.nih.gov/genomes/INFLUENZA/influenza.fna.gz) and [genomeset.dat.gz](https://ftp.ncbi.nih.gov/genomes/INFLUENZA/genomeset.dat.gz) files for Influenza sequences and metadata, respectively. diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index da622ef..11b18c7 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -34,12 +34,6 @@ module_order: info: 'This section of the report shows Bcftools calculated statistics after variant calling using Clair3/Medaka and filter frameshift' path_filters: - './bcftools/*' - - snpeff: - name: 'SnpEff' - anchor: 'snpeff' - info: SnpEff variant effect analysis results summary - path_filters: - - './snpeff/*' extra_fn_clean_exts: - type: remove @@ -65,7 +59,6 @@ run_modules: - samtools - mosdepth - bcftools - - snpeff sp: mosdepth/global_dist: diff --git a/modules/local/gfflu.nf b/modules/local/gfflu.nf deleted file mode 100644 index c808b33..0000000 --- a/modules/local/gfflu.nf +++ /dev/null @@ -1,29 +0,0 @@ -process GFFLU { - conda "bioconda::gfflu=0.0.1" - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container '' - } else { - container '' - } - - input: - tuple val(seqid), path(fasta, stageAs: "input*/*") - - output: - tuple val(seqid), path(fasta), path("gfflu/${seqid}.gff"), emit: gff - tuple val(seqid), path(fasta), path("gfflu/${seqid}.gbk"), emit: gbk - path('gfflu/'), emit: outdir - path('versions.yml'), emit: versions - - script: - input_fasta = "${seqid}.fasta" - """ - ln -s $fasta $input_fasta - gfflu -v -o gfflu $input_fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gfflu: \$(gfflu --version | sed 's/gfflu version //') - END_VERSIONS - """ -} diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index b01d45c..c8489b7 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -13,7 +13,6 @@ process MULTIQC { path('samtools/*') path('mosdepth/*') path('bcftools/*') - path('snpeff/*') path('software_versions/*') path(workflow_summary) diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf deleted file mode 100644 index 8f08c69..0000000 --- a/modules/local/snpeff_ann.nf +++ /dev/null @@ -1,72 +0,0 @@ -include { fluPrefix } from './functions' - -process SNPEFF_ANN { - tag "$sample|$segment|$ref_id" - label 'process_low' - - conda 'bioconda::snpeff=5.0' - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' - } else { - container 'quay.io/biocontainers/snpeff:5.0--0' - } - - input: - tuple val(sample), - val(segment), - path(vcf), - val(ref_id), - path(fasta), - path(db), - path(config) - - output: - tuple val(sample), - val(segment), - val(ref_id), - path(fasta), - path("*.vcf"), emit: vcf - tuple val(sample), - val(segment), - val(ref_id), - path(fasta), - path("*.csv"), emit: csv - tuple val(sample), - val(segment), - val(ref_id), - path(fasta), - path("*.genes.txt"), emit: txt - tuple val(sample), - val(segment), - val(ref_id), - path(fasta), - path("*.html"), emit: html - path('versions.yml'), emit: versions - - script: - def args = task.ext.args ?: "" - def prefix = fluPrefix(sample, segment, ref_id) - def avail_mem = 4 - if (!task.memory) { - log.info '[snpEff] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - snpEff \\ - -Xmx${avail_mem}g \\ - ${ref_id} \\ - -config $config \\ - -dataDir $db \\ - $args \\ - $vcf \\ - -csvStats ${prefix}.snpeff.csv \\ - > ${prefix}.snpeff.vcf - mv snpEff_summary.html ${prefix}.snpeff.summary.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - snpeff: \$(snpEff -version 2>&1 | sed 's/^.*SnpEff //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf deleted file mode 100644 index 856a08b..0000000 --- a/modules/local/snpeff_build.nf +++ /dev/null @@ -1,53 +0,0 @@ -process SNPEFF_BUILD { - tag "$fasta" - label 'process_low' - - conda 'bioconda::snpeff=5.0' - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://depot.galaxyproject.org/singularity/snpeff:5.0--0' - } else { - container 'quay.io/biocontainers/snpeff:5.0--0' - } - - input: - tuple val(seqid), path(fasta), path(gff) - - output: - tuple val(seqid), path(fasta), path('snpeff_db'), path('*.config'), emit: db - path('*.config'), emit: config - path('versions.yml'), emit: versions - - script: - def basename = seqid - def avail_mem = 4 - if (!task.memory) { - log.info '[snpEff] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - mkdir -p snpeff_db/genomes/ - cd snpeff_db/genomes/ - ln -s ../../$fasta ${basename}.fa - cd ../../ - mkdir -p snpeff_db/${basename}/ - cd snpeff_db/${basename}/ - ln -s ../../$gff genes.gff - cd ../../ - echo "${basename}.genome : ${basename}" > snpeff.config - - snpEff \\ - -Xmx${avail_mem}g \\ - build \\ - -config snpeff.config \\ - -dataDir ./snpeff_db \\ - -gff3 \\ - -v \\ - ${basename} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - snpeff: \$(snpEff -version 2>&1 | sed 's/^.*SnpEff //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 5b4d261..be3f1e9 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -23,9 +23,6 @@ include { ZSTD_DECOMPRESS as ZSTD_DECOMPRESS_FASTA; ZSTD_DECOMPRESS as ZSTD_DECO include { CAT_DB } from '../modules/local/misc' include { CAT_CONSENSUS } from '../modules/local/misc' include { SEQTK_SEQ } from '../modules/local/seqtk_seq' -include { GFFLU } from '../modules/local/gfflu' -include { SNPEFF_BUILD } from '../modules/local/snpeff_build' -include { SNPEFF_ANN } from '../modules/local/snpeff_ann' include { CHECK_SAMPLE_SHEET } from '../modules/local/check_sample_sheet' include { CHECK_REF_FASTA } from '../modules/local/check_ref_fasta' // using modified BLAST_MAKEBLASTDB from nf-core/modules to only move/publish BLAST DB files @@ -215,25 +212,6 @@ workflow NANOPORE { ch_vcf_filter = BCF_FILTER_MEDAKA.out.vcf } - ch_refseqs = SEQTK_SEQ.out.sample_info.map { [it[2], it[3]] } - GFFLU(ch_refseqs) - - SNPEFF_BUILD(GFFLU.out.gff) - ch_snpeff_ann_input = SEQTK_SEQ.out.sample_info - .map { sample, segment, ref_id, fasta, reads -> - return [ref_id, fasta, sample, segment] - } - .combine(SNPEFF_BUILD.out.db, by: 0) // seqid, ref_fasta, sample, segment, db, config - .map { ref_id, fasta, sample, segment, _, db, config -> - return [sample, segment, ref_id, fasta, db, config] - } - .combine(ch_vcf_filter, by: [0, 1, 2]) // sample, segment, ref_id, ref_fasta, vcf - .map { sample, segment, ref_id, fasta, db, config, _, vcf -> - return [sample, segment, vcf, ref_id, fasta, db, config] - } - - SNPEFF_ANN(ch_snpeff_ann_input) - VCF_FILTER_FRAMESHIFT(ch_vcf_filter) ch_versions = ch_versions.mix(VCF_FILTER_FRAMESHIFT.out.versions) @@ -291,7 +269,6 @@ workflow NANOPORE { MINIMAP2.out.stats.collect().ifEmpty([]), MOSDEPTH_GENOME.out.mqc.collect().ifEmpty([]), BCFTOOLS_STATS.out.stats.collect().ifEmpty([]), - SNPEFF_ANN.out.csv.collect{it[4]}.ifEmpty([]), SOFTWARE_VERSIONS.out.mqc_yml.collect(), ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") )