-
Notifications
You must be signed in to change notification settings - Fork 734
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into fix-nf-test-config
- Loading branch information
Showing
5 changed files
with
558 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
// | ||
// BAM deduplication with UMI processing | ||
// | ||
|
||
include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' | ||
include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' | ||
include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' | ||
include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' | ||
include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' | ||
include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' | ||
include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' | ||
|
||
workflow BAM_DEDUP_UMI { | ||
take: | ||
ch_genome_bam // channel: [ val(meta), path(bam), path(bai) ] | ||
ch_fasta // channel: [ val(meta), path(fasta) ] | ||
umi_dedup_tool // string: 'umicollapse' or 'umitools' | ||
umitools_dedup_stats // boolean: whether to generate UMI-tools dedup stats | ||
bam_csi_index // boolean: whether to generate CSI index | ||
ch_transcriptome_bam // channel: [ val(meta), path(bam) ] | ||
ch_transcript_fasta // channel: [ val(meta), path(fasta) ] | ||
|
||
main: | ||
ch_versions = Channel.empty() | ||
|
||
if (umi_dedup_tool != "umicollapse" && umi_dedup_tool != "umitools"){ | ||
error("Unknown umi_dedup_tool '${umi_dedup_tool}'") | ||
} | ||
|
||
// Genome BAM deduplication | ||
if (umi_dedup_tool == "umicollapse") { | ||
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( | ||
ch_genome_bam | ||
) | ||
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME | ||
ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats | ||
|
||
} else if (umi_dedup_tool == "umitools") { | ||
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( | ||
ch_genome_bam, | ||
umitools_dedup_stats | ||
) | ||
UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME | ||
ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog | ||
} | ||
|
||
// Co-ordinate sort, index and run stats on transcriptome BAM. This takes | ||
// some preparation- we have to coordinate sort the BAM, run the | ||
// deduplication, then restore name sorting and run a script from umitools | ||
// to prepare for rsem or salmon | ||
|
||
// 1. Coordinate sort | ||
|
||
BAM_SORT_STATS_SAMTOOLS ( | ||
ch_transcriptome_bam, | ||
ch_transcript_fasta | ||
) | ||
ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam | ||
.join(BAM_SORT_STATS_SAMTOOLS.out.bai) | ||
|
||
// 2. Transcriptome BAM deduplication | ||
if (umi_dedup_tool == "umicollapse") { | ||
BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME ( | ||
ch_sorted_transcriptome_bam | ||
) | ||
UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME | ||
ch_dedup_log = ch_dedup_log.mix(UMI_DEDUP_GENOME.out.dedup_stats) | ||
|
||
} else if (umi_dedup_tool == "umitools") { | ||
BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( | ||
ch_sorted_transcriptome_bam, | ||
umitools_dedup_stats | ||
) | ||
UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME | ||
ch_dedup_log = ch_dedup_log.mix(UMI_DEDUP_GENOME.out.deduplog) | ||
} | ||
|
||
// 3. Restore name sorting | ||
SAMTOOLS_SORT ( | ||
UMI_DEDUP_TRANSCRIPTOME.out.bam, | ||
ch_fasta | ||
) | ||
|
||
// 4. Run prepare_for_rsem.py on paired-end BAM files | ||
// This fixes paired-end reads in name sorted BAM files | ||
// See: https://github.com/nf-core/rnaseq/issues/828 | ||
ended_transcriptome_dedup_bam = SAMTOOLS_SORT.out.bam | ||
.branch { | ||
meta, bam -> | ||
single_end: meta.single_end | ||
return [ meta, bam ] | ||
paired_end: !meta.single_end | ||
return [ meta, bam ] | ||
} | ||
|
||
UMITOOLS_PREPAREFORRSEM ( | ||
ended_transcriptome_dedup_bam.paired_end | ||
.map { meta, bam -> [ meta, bam, [] ] } | ||
) | ||
|
||
ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end | ||
.mix(UMITOOLS_PREPAREFORRSEM.out.bam) | ||
|
||
// Collect files useful for MultiQC into one helpful emission. Don't | ||
// automatically add transcriptome stats- difficult to separate in multiqc | ||
// without a bit more work | ||
|
||
ch_multiqc_files = ch_dedup_log | ||
.mix(UMI_DEDUP_GENOME.out.stats) | ||
.mix(UMI_DEDUP_GENOME.out.flagstat) | ||
.mix(UMI_DEDUP_GENOME.out.idxstats) | ||
.transpose() | ||
.map{it[1]} | ||
|
||
// Record versions | ||
|
||
ch_versions = UMI_DEDUP_GENOME.out.versions | ||
.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) | ||
.mix(UMITOOLS_PREPAREFORRSEM.out.versions) | ||
|
||
emit: | ||
bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] | ||
bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] | ||
dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] | ||
stats = UMI_DEDUP_GENOME.out.stats.mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) // channel: [ val(meta), path(stats)] | ||
flagstat = UMI_DEDUP_GENOME.out.flagstat.mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) // channel: [ val(meta), path(flagstat)] | ||
idxstats = UMI_DEDUP_GENOME.out.idxstats.mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) // channel: [ val(meta), path(idxstats)] | ||
multiqc_files = ch_multiqc_files // channel: file | ||
transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] | ||
versions = ch_versions // channel: [ path(versions.yml) ] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
name: "bam_dedup_umi" | ||
description: BAM deduplication with UMI processing for both genome and transcriptome alignments | ||
keywords: | ||
- deduplication | ||
- UMI | ||
- BAM | ||
- genome | ||
- transcriptome | ||
- umicollapse | ||
- umitools | ||
|
||
components: | ||
- umitools/prepareforrsem | ||
- samtools/sort | ||
- bam_dedup_stats_samtools_umicollapse | ||
- bam_dedup_stats_samtools_umitools | ||
- bam_sort_stats_samtools | ||
|
||
input: | ||
- ch_genome_bam: | ||
description: Channel with genome BAM files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- bam: | ||
type: file | ||
description: BAM file | ||
pattern: "*.bam" | ||
- bai: | ||
type: file | ||
description: BAM index file | ||
pattern: "*.bai" | ||
- ch_fasta: | ||
description: Channel with genome FASTA file | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- fasta: | ||
type: file | ||
description: Genome FASTA file | ||
pattern: "*.{fa,fasta}" | ||
- umi_dedup_tool: | ||
description: UMI deduplication tool to use | ||
structure: | ||
- value: | ||
type: string | ||
description: Either 'umicollapse' or 'umitools' | ||
- umitools_dedup_stats: | ||
description: Whether to generate UMI-tools deduplication stats | ||
structure: | ||
- value: | ||
type: boolean | ||
description: True or False | ||
- bam_csi_index: | ||
description: Whether to generate CSI index | ||
structure: | ||
- value: | ||
type: boolean | ||
description: True or False | ||
- ch_transcriptome_bam: | ||
description: Channel with transcriptome BAM files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- bam: | ||
type: file | ||
description: BAM file | ||
pattern: "*.bam" | ||
- ch_transcript_fasta: | ||
description: Channel with transcript FASTA file | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- fasta: | ||
type: file | ||
description: Transcript FASTA file | ||
pattern: "*.{fa,fasta}" | ||
|
||
output: | ||
- bam: | ||
description: Channel containing deduplicated genome BAM files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- bam: | ||
type: file | ||
description: Deduplicated BAM file | ||
pattern: "*.bam" | ||
- bai: | ||
description: Channel containing indexed BAM (BAI) files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- bai: | ||
type: file | ||
description: BAM index file | ||
pattern: "*.bai" | ||
- csi: | ||
description: Channel containing CSI files (if bam_csi_index is true) | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- csi: | ||
type: file | ||
description: CSI index file | ||
pattern: "*.csi" | ||
- dedup_log: | ||
description: Channel containing deduplication log files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- log: | ||
type: file | ||
description: Deduplication log file | ||
pattern: "*.log" | ||
- stats: | ||
description: Channel containing BAM statistics files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- stats: | ||
type: file | ||
description: BAM statistics file | ||
pattern: "*.stats" | ||
- flagstat: | ||
description: Channel containing flagstat files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- flagstat: | ||
type: file | ||
description: Flagstat file | ||
pattern: "*.flagstat" | ||
- idxstats: | ||
description: Channel containing idxstats files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- idxstats: | ||
type: file | ||
description: Idxstats file | ||
pattern: "*.idxstats" | ||
- multiqc_files: | ||
description: Channel containing files for MultiQC | ||
structure: | ||
- file: | ||
type: file | ||
description: File for MultiQC | ||
- transcriptome_bam: | ||
description: Channel containing deduplicated transcriptome BAM files | ||
structure: | ||
- meta: | ||
type: map | ||
description: Metadata map | ||
- bam: | ||
type: file | ||
description: Deduplicated transcriptome BAM file | ||
pattern: "*.bam" | ||
- versions: | ||
description: Channel containing software versions file | ||
structure: | ||
- versions: | ||
type: file | ||
description: File containing versions of the software used | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@pinin4fjords" | ||
maintainers: | ||
- "@pinin4fjords" |
Oops, something went wrong.