Skip to content

Commit

Permalink
Consensus sequence workflow
Browse files Browse the repository at this point in the history
Also fixed issue with multiple headers being
written to splitcode config file when resuming
pipeline
  • Loading branch information
mcmero committed Jun 21, 2024
1 parent 0ec414a commit 0a86dc9
Show file tree
Hide file tree
Showing 10 changed files with 216 additions and 5 deletions.
12 changes: 10 additions & 2 deletions .test/test.config
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ params {
// set this to a higher value if using a reference with a large number of
// repetitive sequences
minimap_f = 0.0002

// whether to generate consensus sequence per sample
consensus = true

// model to use for medaka polishing
medaka_model = "r1041_e82_400bps_sup_v4.2.0"
}

//SQL DB Plugin
Expand Down Expand Up @@ -101,7 +107,9 @@ profiles {
stubRun = true
cleanup = false
}
log {
includeConfig 'logger.config'
debug {
dumpHashes = true
process.beforeScript = 'echo $HOSTNAME'
cleanup = false
}
}
8 changes: 8 additions & 0 deletions envs/racon-medaka.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: racon-medaka
channels:
- conda-forge
- bioconda
dependencies:
- racon
- medaka

22 changes: 20 additions & 2 deletions main.nf
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ include { SplitCode } from './modules/demux.nf'
include { IndexGuides } from './modules/count.nf'
include { CountGuides } from './modules/count.nf'
include { CollateCounts } from './modules/count.nf'
include { Consensus } from './subworkflows/consensus'
if (params.use_db) {
include { fromQuery } from 'plugin/nf-sqldb'
}
Expand Down Expand Up @@ -89,7 +90,7 @@ workflow {

return "$group\t$id\t$sequence\t$distances\t$nextTag\t1\t1\t$locations"
}
.collectFile(name: 'config.txt', newLine: true).set{config_ch}
.collectFile(name: 'config_tmp.txt', newLine: true).set{config_ch}
} else {
// build the config file from the index template
def indexes = []
Expand All @@ -108,7 +109,7 @@ workflow {
}
}
Channel.from( indexes )
.collectFile(name: 'config.txt', newLine: true).set{config_ch}
.collectFile(name: 'config_tmp.txt', newLine: true).set{config_ch}
}
CreateConfigFile(config_ch).set{configFile}
GenerateSelectFile(file(params.index_template_file)).set{selectTxt}
Expand All @@ -126,5 +127,22 @@ workflow {
IndexGuides(params.guides_fasta).set{index_ch}
CountGuides(index_ch.done, demux_ch, file("${params.outdir}/${guidesIndex}")).set{count_ch}
CollateCounts(count_ch.counts.collect())

if (params.consensus) {
// reformat channel for consensus input
// to tuple (sampleName, bamFile, fastqFile)
// filter out unmapped and out files from splitcode
count_ch.alignments.flatMap { sample ->
def (sampleName, bamFiles, fastqFiles) = sample
return bamFiles.indices.collect { index ->
[sampleName, bamFiles[index], fastqFiles[index]]
}
}.filter{ sampleName, bamFile, fastqFile ->
!bamFile.getName().startsWith("unmapped.bam") &&
!bamFile.getName().startsWith("out.bam")
}.set{ bam_ch }

Consensus(bam_ch, file(params.guides_fasta), params.medaka_model)
}
}
}
22 changes: 22 additions & 0 deletions modules/bam2sam.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
process PrepareForConsensus {
label = "PrepareForConsensus"

publishDir "${params.outdir}/count/${sampleName}"

conda "${ params.conda_env_location != null && params.conda_env_location != '' ?
params.conda_env_location + '/minimap-samtools' :
projectDir + '/envs/minimap-samtools.yaml' }"

input:
tuple val(sampleName), path(bam), path(fastq)

output:
tuple val(sampleName), path("*.sam"), path("*.bz")

script:
def sample = bam.getSimpleName()
"""
samtools view -h ${bam} > ${sample}.sam
zcat ${fastq} | bgzip -c - > ${sample}.fastq.bz
"""
}
2 changes: 1 addition & 1 deletion modules/count.nf
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ process CountGuides {
path guides_index

output:
path "*.bam*"
tuple val(sampleName), path("*.bam"), path(fastqs), emit: alignments
path "*.txt", emit: counts

script:
Expand Down
49 changes: 49 additions & 0 deletions modules/medaka.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// based on https://github.com/nf-core/modules/blob/master/modules/nf-core/medaka/main.nf

process Medaka {
tag "${reads.getSimpleName()}"
label 'Medaka'

publishDir "${params.outdir}/consensus/${sampleName}", mode: 'copy'

conda "${ params.conda_env_location != null && params.conda_env_location != '' ?
params.conda_env_location + '/racon-medaka' :
projectDir + '/envs/racon-medaka.yaml' }"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/medaka:1.4.4--py38h130def0_0' :
'biocontainers/medaka:1.4.4--py38h130def0_0' }"

input:
tuple val(sampleName), path(reads), path(assembly)
val(medaka_model)

output:
tuple val(sampleName), path("*.fa.gz"), emit: assembly
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: reads.getSimpleName()
"""
medaka_consensus \\
-t $task.cpus \\
$args \\
-i $reads \\
-d $assembly \\
-m $medaka_model \\
-o ./
mv consensus.fasta ${prefix}.fa
gzip -n ${prefix}.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' )
END_VERSIONS
"""
}
44 changes: 44 additions & 0 deletions modules/racon.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// adapted from https://github.com/nf-core/modules/blob/master/modules/nf-core/racon/main.nf

process Racon {
tag "${sam.getSimpleName()}"
label 'Racon'

publishDir "${params.outdir}/consensus/${sampleName}", mode: 'copy'

conda "${ params.conda_env_location != null && params.conda_env_location != '' ?
params.conda_env_location + '/racon-medaka' :
projectDir + '/envs/racon-medaka.yaml' }"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1' :
'biocontainers/racon:1.4.20--h9a82719_1' }"

input:
tuple val(sampleName), path(sam), path(reads)
path(assembly)

output:
tuple val(sampleName), path(reads), path('*_racon_consensus.fasta') , emit: racon_consensus
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: sam.getSimpleName()
"""
racon -t "$task.cpus" \\
"${reads}" \\
"${sam}" \\
$args \\
"${assembly}" > \\
${prefix}_racon_consensus.fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
racon: \$( racon --version 2>&1 | sed 's/^.*v//' )
END_VERSIONS
"""
}
11 changes: 11 additions & 0 deletions nextflow.config
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ params {
// set this to a higher value if using a reference with a large number of
// repetitive sequences
minimap_f = 0.0002

// whether to generate consensus sequence per sample
consensus = false

// model to use for medaka polishing
medaka_model = "r1041_e82_400bps_sup_v4.2.0"
}

//SQL DB Plugin
Expand Down Expand Up @@ -99,6 +105,11 @@ profiles {
stubRun = true
cleanup = false
}
debug {
dumpHashes = true
process.beforeScript = 'echo $HOSTNAME'
cleanup = false
}
}

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
Expand Down
5 changes: 5 additions & 0 deletions nextflow_schema.json
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@
"type": "boolean",
"default":false,
"description": "If true, only count reads and skip trimming and demultiplexing."
},
"consensus": {
"type": "boolean",
"default": false,
"description": "If true, generate consensus sequences from guide alignments."
}
}
},
Expand Down
46 changes: 46 additions & 0 deletions subworkflows/consensus.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
include { Racon } from '../modules/racon'
include { Medaka } from '../modules/medaka'

process PrepareForConsensus {
label = "PrepareForConsensus"
// convert bam file to sam for racon and
// rezip fastq file using bgzip for medaka

publishDir "${params.outdir}/count/${sampleName}"

conda "${ params.conda_env_location != null && params.conda_env_location != '' ?
params.conda_env_location + '/minimap-samtools' :
projectDir + '/envs/minimap-samtools.yaml' }"

input:
tuple val(sampleName), path(bam), path(fastq)

output:
tuple val(sampleName), path("*.sam"), path("rezip_*.fastq.gz"), emit: racon_input

script:
def sample = bam.getSimpleName()
"""
samtools view -h ${bam} > ${sample}.sam
zcat ${fastq} | bgzip -c - > rezip_${sample}.fastq.gz
"""
}

workflow Consensus {
take:
consensus_input
reference
medaka_model

main:
PrepareForConsensus(consensus_input)

Racon(PrepareForConsensus.out.racon_input, reference)

Medaka(Racon.out.racon_consensus, medaka_model)

consensus_sequences = Medaka.out.assembly

emit:
consensus_sequences
}

0 comments on commit 0a86dc9

Please sign in to comment.