From 53a97948affb07725328a1bba2fec79b331374a6 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Sat, 24 Feb 2024 21:38:33 +0000 Subject: [PATCH] Reorder rnaseq preprocessing, fix minor issues, test sortmerna (#4982) * Trimming should come first in preprocessing * Update tests to run sortmerna * sortmerna working in subworkflow * Don't need test data updates * Appease eclint --- .../nf-core/preprocess_rnaseq/main.nf | 52 ++++++++++--------- .../preprocess_rnaseq/tests/main.nf.test | 28 +++++++--- .../preprocess_rnaseq/tests/main.nf.test.snap | 20 +++---- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/subworkflows/nf-core/preprocess_rnaseq/main.nf b/subworkflows/nf-core/preprocess_rnaseq/main.nf index 4ea083a219e..8e0b7b0d9e2 100644 --- a/subworkflows/nf-core/preprocess_rnaseq/main.nf +++ b/subworkflows/nf-core/preprocess_rnaseq/main.nf @@ -88,26 +88,6 @@ workflow PREPROCESS_RNASEQ { ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) - // - // MODULE: Remove ribosomal RNA reads - // - if (remove_ribo_rna) { - ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) - .map { row -> file(row, checkIfExists: true) } - .collect() - - SORTMERNA ( - ch_filtered_reads, - ch_sortmerna_fastas - ) - .reads - .set { ch_filtered_reads } - - ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.map{ it[1] }) - - ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) - } - // // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! // @@ -128,7 +108,6 @@ workflow PREPROCESS_RNASEQ { ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip) .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log) - .map{ it[1] } .mix(ch_multiqc_files) } @@ -155,7 +134,6 @@ workflow PREPROCESS_RNASEQ { ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip) .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.map{tuple(it[0], [it[1]])}) - .map{ it[1] } .mix(ch_multiqc_files) } @@ -196,11 +174,35 @@ workflow PREPROCESS_RNASEQ { [ [], [] ], false ) - .primary_fastq - .set { ch_filtered_reads } + + BBMAP_BBSPLIT.out.primary_fastq + .set { ch_filtered_reads } + ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) } + // + // MODULE: Remove ribosomal RNA reads + // + if (remove_ribo_rna) { + ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) + .map { row -> file(row, checkIfExists: true) } + .collect() + + SORTMERNA ( + ch_filtered_reads, + ch_sortmerna_fastas + ) + + SORTMERNA.out.reads + .set { ch_filtered_reads } + + ch_multiqc_files = ch_multiqc_files + .mix(SORTMERNA.out.log) + + ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) + } + // Branch FastQ channels if 'auto' specified to infer strandedness ch_filtered_reads .branch { @@ -248,7 +250,7 @@ workflow PREPROCESS_RNASEQ { reads = ch_strand_inferred_fastq trim_read_count = ch_trim_read_count - multiqc_files = ch_multiqc_files + multiqc_files = ch_multiqc_files.transpose().map{it[1]} versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test b/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test index 78c0ce74a7c..1b49859b151 100644 --- a/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test +++ b/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test @@ -18,24 +18,31 @@ nextflow_workflow { tag "subworkflows/fastq_fastqc_umitools_fastp" tag "subworkflows/fastq_subsample_fq_salmon" + + test("homo_sapiens paired-end [fastq] fastp") { when { workflow { """ - input[0] = Channel.of([ + ch_reads = Channel.of([ [ id:'test', single_end:false, strandedness:'auto' ], // meta map [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) ] - ]) // ch_reads + ]) + + ch_ribo_db = file('ribo_db.txt') + ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta') + + input[0] = ch_reads input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf input[4] = [] // ch_salmon_index input[5] = [] // ch_bbsplit_index - input[6] = [] // ch_ribo_db + input[6] = ch_ribo_db // ch_ribo_db input[7] = true // skip_bbsplit input[8] = false // skip_fastqc input[9] = false // skip_trimming @@ -44,7 +51,7 @@ nextflow_workflow { input[12] = 'fastp' // trimmer input[13] = 10 // min_trimmed_reads input[14] = true // save_trimmed - input[15] = false // remove_ribo_rna + input[15] = true // remove_ribo_rna input[16] = false // with_umi input[17] = 0 // umi_discard_read """ @@ -72,19 +79,24 @@ nextflow_workflow { when { workflow { """ - input[0] = Channel.of([ + ch_reads = Channel.of([ [ id:'test', single_end:false, strandedness:'auto' ], // meta map [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) ] - ]) // ch_reads + ]) + + ch_ribo_db = file('ribo_db.txt') + ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta') + + input[0] = ch_reads input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf input[4] = [] // ch_salmon_index input[5] = [] // ch_bbsplit_index - input[6] = [] // ch_ribo_db + input[6] = ch_ribo_db // ch_ribo_db input[7] = true // skip_bbsplit input[8] = false // skip_fastqc input[9] = false // skip_trimming @@ -93,7 +105,7 @@ nextflow_workflow { input[12] = 'fastp' // trimmer input[13] = 10 // min_trimmed_reads input[14] = true // save_trimmed - input[15] = false // remove_ribo_rna + input[15] = true // remove_ribo_rna input[16] = false // with_umi input[17] = 0 // umi_discard_read """ diff --git a/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap b/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap index 691e07ac0b2..21bdb2492e3 100644 --- a/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap +++ b/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap @@ -16,7 +16,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:05:51.060371" + "timestamp": "2024-02-24T16:19:13.057802" }, "trimgalore_test_pe_reads_2_lines": { "content": "eccf3e9e74589ff01c77fce7f4548e41", @@ -24,7 +24,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:16:44.427598" + "timestamp": "2024-02-24T17:44:07.667653" }, "fastp_test_pe_reads_1_size": { "content": [ @@ -34,7 +34,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:05:51.019935" + "timestamp": "2024-02-24T17:43:46.173892" }, "trimgalore_test_pe_reads_1_size": { "content": [ @@ -44,7 +44,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:16:44.398923" + "timestamp": "2024-02-24T17:44:07.642318" }, "trimgalore_test_pe_reads_1_lines": { "content": "3868fc1caf09367141d2bbf47e158823", @@ -52,7 +52,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:16:44.395858" + "timestamp": "2024-02-24T17:44:07.641186" }, "fastp_test_pe_reads_2_lines": { "content": "eccf3e9e74589ff01c77fce7f4548e41", @@ -60,7 +60,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:05:51.05632" + "timestamp": "2024-02-24T17:43:46.235022" }, "fastp_test_pe_reads_2_size": { "content": [ @@ -70,7 +70,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:05:51.058326" + "timestamp": "2024-02-24T17:43:46.242006" }, "trimgalore_test_pe_reads_2_size": { "content": [ @@ -80,7 +80,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:16:44.430226" + "timestamp": "2024-02-24T17:44:07.668644" }, "fastp_test_pe_reads_1_lines": { "content": "3868fc1caf09367141d2bbf47e158823", @@ -88,7 +88,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:05:51.015562" + "timestamp": "2024-02-24T17:43:46.161535" }, "trimgalore_read_count": { "content": [ @@ -107,6 +107,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-07T11:16:44.432645" + "timestamp": "2024-02-24T17:44:07.669435" } } \ No newline at end of file