From fb0e4bf79f1ed3e74f5677d518f99b399dd91698 Mon Sep 17 00:00:00 2001 From: Peter Kruczkiewicz Date: Thu, 25 Jul 2024 09:32:45 -0500 Subject: [PATCH] feat: create files summarizing annotation issues and failures --- conf/modules_illumina.config | 11 +++++++++++ conf/modules_nanopore.config | 11 +++++++++++ modules/local/vadr.nf | 18 ++++++++++++++++++ workflows/illumina.nf | 3 ++- workflows/nanopore.nf | 3 ++- 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 4aa6041..eee9939 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -37,6 +37,17 @@ process { ] } + withName: 'VADR_SUMMARIZE_ISSUES' { + ext.args = '' + publishDir = [ + [ + path: { "${params.outdir}/annotation" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ] + } + withName: 'POST_TABLE2ASN' { ext.args = '' publishDir = [ diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index eba39e9..712e756 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -255,6 +255,17 @@ process { ] } + withName: 'VADR_SUMMARIZE_ISSUES' { + ext.args = '' + publishDir = [ + [ + path: { "${params.outdir}/annotation" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ] + } + withName: 'POST_TABLE2ASN' { ext.args = '' publishDir = [ diff --git a/modules/local/vadr.nf b/modules/local/vadr.nf index c11e93e..933a4ea 100644 --- a/modules/local/vadr.nf +++ b/modules/local/vadr.nf @@ -29,3 +29,21 @@ process VADR { END_VERSIONS """ } + +process VADR_SUMMARIZE_ISSUES { + executor 'local' + memory 100.MB + + input: + path(vadr_output, stageAs: "input*/*") + + output: + path('vadr-annotation-issues.txt'), emit: issues + path('vadr-annotation-failed-sequences.txt'), emit: failed + + script: + """ + cat input*/**/*.alt.list | awk 'NR == 1 || \$0 !~ /^#/' > vadr-annotation-issues.txt + cat input*/**/*.fail.list > vadr-annotation-failed-sequences.txt + """ +} diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 8c92b2a..5137593 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -20,7 +20,7 @@ include { BLAST_MAKEBLASTDB } from '../modules/local/blast_makeblastdb' include { BLAST_BLASTN } from '../modules/local/blastn' include { CAT_ILLUMINA_FASTQ } from '../modules/local/cat_illumina_fastq' include { ZSTD_DECOMPRESS as ZSTD_DECOMPRESS_FASTA; ZSTD_DECOMPRESS as ZSTD_DECOMPRESS_CSV } from '../modules/local/zstd_decompress' -include { VADR } from '../modules/local/vadr' +include { VADR; VADR_SUMMARIZE_ISSUES } from '../modules/local/vadr' include { PRE_TABLE2ASN; TABLE2ASN; POST_TABLE2ASN } from '../modules/local/table2asn' include { CUSTOM_DUMPSOFTWAREVERSIONS as SOFTWARE_VERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' @@ -92,6 +92,7 @@ workflow ILLUMINA { VADR.out.feature_table .combine(VADR.out.pass_fasta, by: 0) .set { ch_pre_table2asn } + VADR_SUMMARIZE_ISSUES(VADR.out.vadr_outdir.map { [it[1]] }.collect()) PRE_TABLE2ASN(ch_pre_table2asn) ch_versions = ch_versions.mix(PRE_TABLE2ASN.out.versions) TABLE2ASN(PRE_TABLE2ASN.out.table2asn_input) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 91641ae..96a82bc 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -32,7 +32,7 @@ include { BLAST_BLASTN as BLAST_BLASTN_IRMA } from '../modules include { BLAST_BLASTN as BLAST_BLASTN_CONSENSUS } from '../modules/local/blastn' include { BLAST_BLASTN as BLAST_BLASTN_CONSENSUS_REF_DB } from '../modules/local/blastn' include { CUSTOM_DUMPSOFTWAREVERSIONS as SOFTWARE_VERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' -include { VADR } from '../modules/local/vadr' +include { VADR; VADR_SUMMARIZE_ISSUES } from '../modules/local/vadr' include { PRE_TABLE2ASN; TABLE2ASN; POST_TABLE2ASN } from '../modules/local/table2asn' include { MULTIQC } from '../modules/local/multiqc' @@ -247,6 +247,7 @@ workflow NANOPORE { VADR.out.feature_table .combine(VADR.out.pass_fasta, by: 0) .set { ch_pre_table2asn } + VADR_SUMMARIZE_ISSUES(VADR.out.vadr_outdir.map { [it[1]] }.collect()) PRE_TABLE2ASN(ch_pre_table2asn) ch_versions = ch_versions.mix(PRE_TABLE2ASN.out.versions) TABLE2ASN(PRE_TABLE2ASN.out.table2asn_input)