Skip to content

Commit

Permalink
nextclade primers and no sample sheet
Browse files Browse the repository at this point in the history
  • Loading branch information
cjw85 committed Feb 24, 2021
1 parent b511347 commit 8387904
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 15 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.0.7]
## Changed
- Sample sheet is no longer required.
- Nextclade visual will display overlap to primer scheme selected by user.

## [v0.0.6]
### Added
Expand Down
31 changes: 31 additions & 0 deletions bin/scheme_to_nextclade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python

import argparse
import os
import pysam


def main():
parser = argparse.ArgumentParser()
parser.add_argument('bed')
parser.add_argument('reference')
parser.add_argument('output')
args = parser.parse_args()

pysam.FastaFile(args.reference)
refs = {r.name:r.sequence for r in pysam.FastxFile(args.reference)}
scheme = os.path.splitext(os.path.basename(args.bed))[0]
#MN908947.3 30 54 nCoV-2019_1_LEFT 1 +
with open(args.bed) as fh, open(args.output, 'w') as out_fh:
out_fh.write("Country (Institute),Target,Oligonucleotide,Sequence\n")
for line in fh.readlines():
rname, start, end, name, pool, orient = line.split('\t')
pname = '{}:{}-{}'.format(rname, start, end)
seq = refs[rname][int(start):int(end)]
out_fh.write(','.join((scheme, pname, name, seq)))
out_fh.write('\n')


if __name__ == '__main__':
main()

59 changes: 44 additions & 15 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ Usage:
Options:
--fastq DIR Path to FASTQ directory (required)
--samples FILE CSV file with columns named `barcode` and `sample_name` (required)
--samples FILE CSV file with columns named `barcode` and `sample_name`
(or simply a sample name for non-multiplexed data).
--out_dir DIR Path for output (default: $params.out_dir)
--medaka_model STR Medaka model name (default: $params.medaka_model)
--min_len INT Minimum read length (default: set by scheme)
Expand All @@ -23,6 +24,12 @@ Options:
barcodes and sample names. (default: $params.scheme_version)
Notes:
If directories named "barcode*" are found under the `--fastq` directory the
data is assumed to be multiplex and each barcode directory will be processed
independently. If `.fastq(.gz)` files are found under the `--fastq` directory
the sample is assumed to not be multiplexed. In this second case `--samples`
should be a simple name rather than a CSV file.
Minimum and maximum rad length filters are applied based on the amplicon scheme.
These can be overridden using the `--min_len` and `--max_len` options.
"""
Expand Down Expand Up @@ -95,7 +102,11 @@ process runArtic {
}
# name everything by the sample rather than barcode
ln -s $directory $sample_name
if [[ "$directory" != "$sample_name" ]]; then
echo "Moving input: '$directory' to '$sample_name'"
mv $directory $sample_name
fi
artic guppyplex --skip-quality-check \
--min-length $params._min_len --max-length $params._max_len \
--directory $sample_name --prefix $sample_name \
Expand Down Expand Up @@ -348,11 +359,15 @@ process nextclade {
cpus 1
input:
file "consensus.fasta"
file "reference.fasta"
file scheme_bed
output:
file "nextclade.json"
"""
which nextclade
nextclade --input-fasta 'consensus.fasta' --output-json 'nextclade.json'
scheme_to_nextclade.py $scheme_bed reference.fasta primers.csv
nextclade \
--input-fasta consensus.fasta --input-pcr-primers primers.csv \
--output-json nextclade.json
"""
}

Expand All @@ -379,6 +394,8 @@ workflow pipeline {
take:
samples
scheme_directory
reference
primers
main:
read_summaries = preArticQC(samples)
runArtic(samples, scheme_directory)
Expand All @@ -387,11 +404,10 @@ workflow pipeline {
tmp = runArtic.out[1].toList().transpose().toList() // surely theres another way?
all_variants = allVariants(tmp)
// nextclade
clades = nextclade(all_consensus)
clades = nextclade(all_consensus, reference, primers)
// report
html_doc = report(
runArtic.out[2].collect(), read_summaries.collect(), clades.collect())


results = all_consensus.concat(all_variants, html_doc)
emit:
Expand All @@ -401,8 +417,8 @@ workflow pipeline {
// entrypoint workflow
workflow {

if (!params.samples or !params.fastq) {
println("`--samples and `--fastq` are required")
if (!params.fastq) {
println("`--fastq` is required")
helpMessage()
exit 1
}
Expand Down Expand Up @@ -443,16 +459,30 @@ workflow {
params.full_scheme_name = params.scheme_name + "/" + params.scheme_version
schemes = projectDir + '/data/primer_schemes'
scheme_directory = file(schemes, type: 'dir', checkIfExists:true)
reference = file(
"${scheme_directory}/${params.full_scheme_name}/${params.scheme_name}.reference.fasta",
type:'file', checkIfExists:true)
primers = file(
"${scheme_directory}/${params.full_scheme_name}/${params.scheme_name}.scheme.bed",
type:'file', checkIfExists:true)

// resolve whether we have demultiplexed data or single sample
barcode_dirs = file("$params.fastq/barcode*", type: 'dir', maxdepth: 1)
not_barcoded = file("$params.fastq/*.fastq", type: 'file', maxdepth: 1)
not_barcoded = file("$params.fastq/*.fastq*", type: 'file', maxdepth: 1)
if (barcode_dirs) {
println("Found barcode directories")
sample_sheet = Channel
.fromPath(params.samples, checkIfExists: true)
.splitCsv(header: true)
.map { row -> tuple(row.barcode, row.sample_name) }
if (params.samples) {
sample_sheet = Channel
.fromPath(params.samples, checkIfExists: true)
.splitCsv(header: true)
.map { row -> tuple(row.barcode, row.sample_name) }
} else {
// just map directory name to self
sample_sheet = Channel
.fromPath(barcode_dirs)
.filter(~/.*barcode[0-9]{1,3}$/) // up to 192
.map { path -> tuple(path.baseName, path.baseName) }
}
Channel
.fromPath(barcode_dirs)
.filter(~/.*barcode[0-9]{1,3}$/) // up to 192
Expand All @@ -468,7 +498,6 @@ workflow {
.join(sample_sheet)
.set{ samples }
}
//samples.view()
results = pipeline(samples, scheme_directory)
results = pipeline(samples, scheme_directory, reference, primers)
output(results)
}

0 comments on commit 8387904

Please sign in to comment.