-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
55 lines (50 loc) · 1.56 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from os.path import join as pjoin
NT = workflow.cores
WD = config["wd"]
csv = config["csv"]
DATA = {}
for line in open(csv):
pr_idx, i, sra_idx, seqt, paired = line.strip("\n").split(",")
if paired != "PAIRED":
continue
DATA[sra_idx] = pr_idx
rule run:
input:
expand(pjoin(WD, "{pr_idx}", "{sra_idx}-shovill", "contigs.fa"),
zip,
sra_idx=DATA.keys(),
pr_idx=DATA.values()
),
rule fetch:
output:
temp(pjoin(WD, "{pr_idx}", "{sra_idx}_1.fastq")),
temp(pjoin(WD, "{pr_idx}", "{sra_idx}_2.fastq")),
params:
odir = pjoin(WD, "{pr_idx}"),
log: pjoin(WD, "{pr_idx}", "{sra_idx}.log"),
# conda: "envs/sra.yml"
resources:
limit_space=1,
threads: NT / 2
shell:
"""
fasterq-dump --split-3 --skip-technical --progress --threads 4 --temp {params.odir} --outdir {params.odir} {wildcards.sra_idx} &> {log}
"""
rule assemble:
input:
fq1=pjoin(WD, "{pr_idx}", "{sra_idx}_1.fastq"),
fq2=pjoin(WD, "{pr_idx}", "{sra_idx}_2.fastq"),
output:
pjoin(WD, "{pr_idx}", "{sra_idx}-shovill", "contigs.fa"),
params:
odir=pjoin(WD, "{pr_idx}", "{sra_idx}-shovill"),
log: pjoin(WD, "{pr_idx}", "{sra_idx}-shovill.log"),
# conda: "envs/shovill.yml"
priority:
100
threads: NT / 8
shell:
"""
/usr/bin/time -vo {log}.time shovill --force --outdir {params.odir} --R1 {input.fq1} --R2 {input.fq2} --cpus 6 &> {log}
rm {input.fq1} {input.fq2}
"""