-
Notifications
You must be signed in to change notification settings - Fork 0
/
project_config.yaml
68 lines (54 loc) · 1.96 KB
/
project_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
email: address@nih.gov
## directory where we clone this repo locally
scriptdir: /path/to/virome-pipeline
clusterfile: /path/to/virome-pipeline/locus.cluster_config.yaml
## directories
workdir: /path/to/workingdir
# these directories are relative to the working directory
# input directory should have assembly FASTA and bam files. use full path if the input dir is outside of working directory
indir: input
outdir: virome_output
### rule-specific parameters that could change between projects
#genomad database current location
genomaddb: /hpcdata/bio_data/genomad/genomad_db/
# genomad filtering. options are: default, relaxed, conservative
# see https://portal.nersc.gov/genomad/post_classification_filtering.html
genomad_filter: default
# checkv database
checkvdb: /hpcdata/bio_data/checkv/checkv-db-v1.5
# checkv quality sequences to use for clustering vOTUs
# options are all, medium, high, complete
checkv_q: all
# min contig length for dramv and vOTU clustering
vs_min_length: 5000
## amgs
# run virsorter2 to filter contigs (this removes a lot)
# and then run the sequences through DRAM-v to identify
# AMGs
run_amgs: yes
#iphop
# Run iPHoP? yes/no
# yes significantly increases run time and requires a lot
# of memory
run_iphop: no
# Choose min confidence score. Value between 75 and 100. Default is 90.
iphop_min_score: 90
# location of iphop database
iphopdb: /hpcdata/bio_data/iphop/Sept_2021_pub_rw/
# diamond db to annotate genes for eventual pathway inference - we use nr
# yes/no
run_diamond: no
# location of diamonddb
diamonddb: /hpcdata/bio_data/DIAMOND_ncbi_db/nr.diamond.dmnd
### input files
# input files are expected to be like {indir}/{sample prefix}{assembly_suffix}
# change to what makes sense for your data
assembly_suffix: .scaffolds.fasta
# {indir}/{sample prefix}{bam_suffix}
bam_suffix: .bam
# optional tab separated mapping file - will be added to biom file.
mapping_file:
# sample prefixes to be used throughout pipeline
samples:
Sample1
Sample2