-
Notifications
You must be signed in to change notification settings - Fork 6
/
config.yml
115 lines (95 loc) · 4.08 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#####################
# Editing mandatory #
#####################
# Run details. These primarily determine where the output ends up (see output_root below)
sample: '25m'
stype: '1D'
version: 'v2'
# Path of the read (*.fasta) & sequencing summary files you want to analyze
input_fasta: test/reads.fasta
input_summary: test/read_summary.txt
# Path where you would like pipeline results to be written.
# Run outputs will end up in in the directory: <output_root>/<sample>/<stype>/<version>/
output_root: test/test-output/
max_threads: 32 # Maximum number of threads the pipeline will attempt to use
pre_filter: DTR # Only process reads with DTR (None: all reads, Virsorter: virsorter)
# virsorter is purely experimental at this point
# Medaka model depends on whether MinION or PromethION used for sequencing and which Guppy version
# was used for basecalling. Please see Medaka documentation for list of available models.
MEDAKA:
model: 'r941_min_high_g303'
threads: 4 # Should be << max_threads so that multiple genomes can be polished in parallel
KAIJU:
db: /media/sfodata/databases/kaiju/nr_euk # dbs can be downloaded from http://kaiju.binf.ku.dk/server
switch: '-a greedy -e 5' # max 5 substitutions and allows substitutions; slower but more sensitive than default params
run: True # Set to False to skip kaiju
# will also be skipped if DB is missing
# Specify which k-mer bin to skip for each sample/run/version. HDBSCAN assigns all unbinned reads to bin_id = -1,
# so this bin should always be skipped. But occasionally other 'bad' bins require skipping if they generate downstream
# errors in the pipeline.
SKIP_BINS:
25m:
1D:
v1: [-1]
v2: [-1]
####################
# Editing optional #
####################
DTR_FIND:
ovlp: 20 # Percent of read start/end to check for alignment
chunksize: 5000
VIRSORTER:
db: 1 # Either "1" (DEFAULT Refseqdb) or "2" (Viromedb)
diamond: True # False: blastp, True: diamond
virome: False # True: input is a virome, don't train on data
prophage: False # False: only use phage, True: use prohpage, too
data_dir: '/mnt/arginine/galaxydata/seqdbs/virsorter-data'
# location of virsorter-data
KMER_CALC:
k: 5 # k-mer size to use for k-mer binning
cli: '-c' # Use raw k-mer counts instead of normalized k-mer frequencies (best for genome-spanning reads)
UMAP:
min_rl: 5000 # Minimum readlength to consider in analysis
max_cb_rl: 60000 # Maximum read length to include in colorbar for UMAP scatterplot annotated by read length
min_d: 0.1 # min_dist paramter for UMAP
neighbors: 15 # n_neighbors parameter for UMAP
min_q: 9 # Filtering out low qscore reads prior to running UMAP improves resolution of bins
bin_min_reads: 10 # Don't create a bin if the bin contains < bin_min_reads
scatter_size: 7 # Marker size for UMAP scatter plots
scatter_alpha: 0.2 # Marker alpha for UMAP scatter plots
BIN_AVA_ALIGN:
MINIMAP2:
cli: '-x ava-ont --no-long-join -r100'
threads: 1
CLUST:
min_score_frac: 0.8
min_reads: 5
max_recursion: -1 # set to non-zero value to override python's max recursion depth
DTR_ALIGN:
ovlp: 20 # Percent of read start/end to check for alignment
threads: 1 # Single thread is optimum for aligning single read start/end
NUCMER:
mincov: 98.0 # Genome-genome percent coverage threshold for deduplicating polished genomes
minpct: 98.0 # Genome-genome percent identity thresholf for deduplicating polished genomes
MINIMAP2:
STANDARD:
switches: '-x map-ont'
threads: 4
RACON:
repeat: 3 # Number of iterations to run
include-unpolished: True
window-length: 500
quality-threshold: 9
error-threshold: 0.3
match: 5
mismatch: -4
gap: -8
threads: 4
fragment-correction: True ## should not contain dual/self overlaps
PORECHOP:
switches: '--no_split'
PRODIGAL:
threads: 4
LIN_CONCAT:
ovlp_len: 3000
minlen: 15000