Skip to content

Commit

Permalink
Added demultiplexing option
Browse files Browse the repository at this point in the history
Added shared conda env location
  • Loading branch information
mcmero committed Feb 26, 2024
1 parent b518aff commit 1ea7c9a
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 16 deletions.
5 changes: 5 additions & 0 deletions .test/data/config.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
groups ids tags distances next minFindsG maxFindsG locations
Fwd Fwd_01 TAGATCGC 0 {{Rev}} 1 1 0:0:12
Fwd Fwd_02 CTCTCTAT 0 {{Rev}} 1 1 0:0:12
Rev Rev_01 TCGCCTTA 0 - 1 1 0:-12:0
Rev Rev_02 CTAGTACG 0 - 1 1 0:-12:0
4 changes: 4 additions & 0 deletions .test/data/indexes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fwd_01
Fwd_02
Rev_01
Rev_02
54 changes: 52 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ println "* genomicsrnd@wehi.edu.au *"
println "* *"
println "*********************************************************"

include { trim_primer } from './modules/trim.nf'
include { TrimPrimer } from './modules/trim.nf'
include { GenerateSelectFile } from './modules/demux.nf'
include { CreateConfigFile } from './modules/demux.nf'
include { SplitCode } from './modules/demux.nf'
if (!workflow.stubRun) {
include { fromQuery } from 'plugin/nf-sqldb'
}

workflow {

Expand All @@ -22,6 +28,50 @@ workflow {
""")
}.set{input_ch}

trim_primer(input_ch, params.fwd_primer, params.rev_primer, params.mismatches, params.barcode_length, params.output_untrimmed)
trim_ch = TrimPrimer(input_ch,
params.fwd_primer,
params.rev_primer,
params.primer_mismatches,
params.barcode_length,
params.output_untrimmed)

if (params.demultiplex) {
if (!params.is_config_file_provided) {
def where_ch = []
// Construct the where clause for the query
new File(params.index_template_file).readLines().each { line ->
if (line.trim() != 'index_name') {
where_ch << "'${line.trim()}'"
}
}
def where_clause = where_ch.join(",")
def query = """SELECT index_name,
index_sequence,
index_sequence_rc,
index_direction
FROM amplicon_index
WHERE index_name IN (${where_clause});"""

Channel.fromQuery(query, db: 'my-db', batchSize:100)
.map { index ->
def id = index[0]
def direction = index[3]
def group = direction == "F" ? "Fwd" : "Rev"
def tag = direction == "F" ? index[1] : index[2] // index_sequence or index_sequence_rc
def distances = direction == 'F' ? "${params.idx_5p_mismatch}" : "${params.idx_3p_mismatch}"
def next = direction == 'F' ? '{{Rev}}' : '-'
def locations = direction == 'F' ? "0:0:${params.bases_num_r1}" : "0:${params.bases_num_r2}:0"

return "$group\t$id\t$tag\t$distances\t$next\t1\t1\t$locations"
}
.collectFile(name: 'config.txt', newLine: true).set{config_ch}
CreateConfigFile(config_ch).set{configFile}
} else {
Channel.fromPath("${params.input_dir}/config.txt").set { configFile }
}
GenerateSelectFile(file(params.index_template_file)).set{selectTxt}
SplitCode(trim_ch.trimmed_ch,
configFile,
selectTxt)
}
}
100 changes: 100 additions & 0 deletions modules/demux.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// This module was written by WEHI Research Computing
// see https://github.com/WEHIGenomicsRnD/demultiplex-paired-end-library

process GenerateSelectFile {
label 'CreateSFile'
queue 'regular'
cpus 2
memory 1.GB
time '1h'
publishDir "${params.outdir}/", mode: 'copy'

input:
path primer_template_file

output:
path "select.txt"


script:
"""
#!/usr/bin/env bash
primer_template=\$(cat ${primer_template_file})
fwd_primers=\$(cat ${primer_template_file} |tr -d '\r'| awk -F ',' '{print \$1}' | grep Fwd)
rev_primers=\$(cat ${primer_template_file} |tr -d '\r'| awk -F ',' '{print \$1}' | grep Rev)
> select.txt
for fwd_primer in \${fwd_primers[@]}; do
for rev_primer in \${rev_primers[@]}; do
echo -e "\${fwd_primer},\${rev_primer}\t\${fwd_primer}-\${rev_primer}" >> select.txt
done
done
"""
}

process CreateConfigFile {

label 'CreateCFile'
queue 'regular'
cpus 2
memory 1.GB
time '1h'

tag "${sampleId}"
publishDir "${params.outdir}/", mode: 'copy'

input:
path(configtxt)

output:
path('config.txt')

script:
"""
echo -e "groups\tids\ttags\tdistances\tnext\tminFindsG\tmaxFindsG\tlocations\n\$(cat ${configtxt})" > config.txt
"""

}
process SplitCode{
label 'SplitCode'

if ( "${workflow.stubRun}" == "false" ) {
queue 'regular'
cpus 16
memory { 8.GB * task.attempt }
errorStrategy { 'retry' }
maxRetries 5
time '24h'
}

tag "${reads.getSimpleName()}"
publishDir "${params.outdir}/split/${reads.getSimpleName()}", mode: 'copy'
container 'oras://ghcr.io/wehi-researchcomputing/splitcode_container:latest'


input:
path(reads)
path(config)
path(select)


output:
path "*.fastq*"
path "*.txt"


script:
"""
splitcode -c ${config} --keep=${select} -t ${task.cpus} --nFastqs=1 \
--assign --summary summary.txt -o out.fastq --gzip \
--no-outb --mapping mapping.txt --seq-names \
--mod-names --com-names --unassigned=unmapped.fastq.gz \
${reads}
for file in *_0.fastq.gz; do
newname="\${file/_0.fastq.gz/.fastq.gz}"
mv "\$file" "\$newname"
done
"""
}
11 changes: 7 additions & 4 deletions modules/trim.nf
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
process trim_primer {
label = 'trim_primer'
process TrimPrimer {
label = 'TrimPrimer'

publishDir params.outdir, mode: 'copy'

conda "${projectDir}/envs/biopython.yaml"
conda "${ params.conda_env_location != null && params.conda_env_location != '' ?
params.conda_env_location + '/biopython' :
projectDir + '/envs/biopython.yaml' }"

input:
path fastq
Expand All @@ -14,7 +16,8 @@ process trim_primer {
val output_untrimmed

output:
path "*.fastq*"
path "*_trimmed.fastq*", emit: trimmed_ch
path "*_untrimmed.fastq*", emit: untrimmed_ch, optional: true
path "*.txt"

script:
Expand Down
56 changes: 50 additions & 6 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,66 @@ params {

rev_primer = "CCTAATATACGGACGCAATC"

mismatches = 1
primer_mismatches = 1

barcode_length = 12

output_untrimmed = true

index_template_file = "$projectDir/.test/data/indexes.txt"

demultiplex = false

//number of bases to search for index 1 at start of read
bases_num_r1 = 12
//number of bases to search for index 2 at end of read
//note that this number should be negative
bases_num_r2 = -12

idx_5p_mismatch = 1

idx_3p_mismatch = 1

//if false, the database is queried to create one
//if true, config.txt should be in the input directory
is_config_file_provided = true

// if specified, use this location to load prebuilt conda environments
conda_env_location = ""
}

//SQL DB Plugin
plugins {
id 'nf-sqldb@0.1.0'
}

sql {
db {
'my-db' {
url = 'jdbc:mysql://pipelinepilot.hpc.wehi.edu.au:3306/genomics?useLegacyDatetimeCode=false&serverTimezone=Australia/Melbourne'
user = 'genomics'
password = secrets.DB
}
}
}

profiles {
milton {
conda.enabled = true
cleanup = true
process.cpus = 1
process.memory = { 8.GB * task.attempt }
process.time = '12h'
process.errorStrategy = { 'retry' }
process.maxRetries = 5
apptainer.enabled = false
apptainer.autoMounts = false
singularity.enabled = false
process {
cpus = 1
memory = { 8.GB * task.attempt }
time = '12h'
errorStrategy = { 'retry' }
maxRetries = 4
withLabel:SplitCode {
module = 'splitcode/0.28.3'
}
}
}
test {
conda.enabled = true
Expand Down
75 changes: 71 additions & 4 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
}
}
},
"other_options": {
"title": "Other Options",
"primer_options": {
"title": "Primer Options",
"type": "object",
"description": "",
"default": "",
Expand All @@ -45,7 +45,7 @@
"default": "",
"description": "Reverse primer sequence"
},
"mismatches": {
"primer_mismatches": {
"type": "integer",
"default": 3,
"description": "number of mismatches allowed in fwd or rev primers"
Expand All @@ -67,14 +67,81 @@
"mismatches",
"barcode_length"
]
},
"demultiplex_options": {
"title": "Demultiplexing Options",
"type": "object",
"description": "",
"default": "",
"properties": {
"demultiplex" : {
"type": "boolean",
"default": false,
"description": "Whether to demultiplex the reads."
},
"bases_num_r1": {
"type": "integer",
"default": 10,
"description": "Number of bases to search for forward index from start of read."
},
"bases_num_r2": {
"type": "integer",
"default": 13,
"description": "Number of bases to search for reverse index from end of read (number should be negative)."
},
"idx_5p_mismatch": {
"type": "integer",
"default": 0,
"description": "Number of mismatches allowed in 5' index."
},
"idx_3p_mismatch": {
"type": "integer",
"default": 0,
"description": "Number of mismatches allowed in 3' index."
},
"index_template_file": {
"type": "string",
"fa_icon": "fas fa-file-csv",
"format": "file-path",
"description": "Text file containing index names used (must match database or config file IDs)."
},
"is_config_file_provided": {
"type": "boolean",
"default": false,
"description": "Optional ready-made config file for splitcode. Use this to skip the databse index lookup. Must be called config.txt and put in input directory."
}
}
},
"generic_options": {
"title": "Generic options",
"type": "object",
"fa_icon": "fas fa-file-import",
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"conda_env_location": {
"type": "string",
"format": "directory-path",
"description": "Use this location for pre-build conda environments (if blank, new conda environments will be created).",
"fa_icon": "fas fa-cog",
"default": "",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input_output_options"
},
{
"$ref": "#/definitions/other_options"
"$ref": "#/definitions/primer_options"
},
{
"$ref": "#/definitions/demultiplex_options"
},
{
"$ref": "#/definitions/generic_options"
}
]
}

0 comments on commit 1ea7c9a

Please sign in to comment.