forked from functional-dark-side/agnostos-wf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
100 lines (86 loc) · 3.93 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# This file should contain everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
wdir: "/vol/cloud/agnostos-wf/db_update"
rdir: "/vol/cloud/db_update"
data: "/vol/cloud/data"
# choose a name for your dataset
new_data_name: "tara_038"
conda_env: "/vol/cloud/agnostos-wf/envs/workflow.yml"
# Cluster update configuration
# Setting update to TRUE to run the cluster-update version of the snakemake workflow (otherwise FALSE or empty)
update: "TRUE"
# Path to original cluster DB (sequence and cluster DBs)
ordir: "/vol/cloud/agnostosDB"
# Path to the sequences that need to be integrated
# Formats: name_contigs.fasta or name_genes.fasta
new_data: "/vol/cloud/data/new_genes.fasta"
# specify at which stage are your data, can be either "genes" or "contigs"
new_data_stage: "genes" #"genes" or "contigs"
# If you alrady have the gene predictions, please provide path to gene completeness information
new_data_partial: "/vol/cloud/data/new_genes_partial_info.tsv"
# Threads configuration
threads_default: 28
threads_collect: 28
threads_cat_ref: 4
threads_res: 14
# Databases
pfam_db: "/vol/cloud/agnostos-wf/databases/Pfam-A.hmm"
pfam_clan: "/vol/cloud/agnostos-wf/databases/Pfam-A.clans.tsv.gz"
antifam_db: "/vol/cloud/agnostos-wf/databases/AntiFam.hmm"
uniref90_db: "/vol/cloud/agnostos-wf/databases/uniref90.fasta.gz"
nr_db: "/vol/cloud/agnostos-wf/databases/nr.fasta.gz"
uniclust_db: "/vol/cloud/agnostos-wf/databases/uniclust30_2018_08/uniclust30_2018_08"
#uniprot_db: "/vol/cloud/agnostos-wf/databases/uniprotKB.fasta.gz"
pfam_hh_db: "/vol/cloud/agnostos-wf/databases/pfam"
DPD: "/vol/cloud/agnostos-wf/databases/dpd_uniprot_sprot.fasta.gz"
db_dir: "/vol/cloud/agnostos-wf/databases/"
taxdb: "UniProtKB"
# Files retrieved from the databases
# List of shared reduced Pfam domain names (dowloadable from Figshare..)
pfam_shared_terms: "/vol/cloud/agnostos-wf/databases/Pfam-31_names_mod_01122019.tsv"
# Created using the protein accessions and the descriptions found on the fasta headers
uniref90_prot: "/vol/cloud/agnostos-wf/databases/uniref90.proteins.tsv.gz"
nr_prot: "/vol/cloud/agnostos-wf/databases/nr.proteins.tsv.gz"
# Information dowloaded from Dataset-S1 from the DPD paper:
dpd_info: "/vol/cloud/agnostos-wf/databases/dpd_ids_all_info.tsv.gz"
# Local template folder
local_tmp: "/vol/scratch/tmp"
# MPI runner (de.NBI cloud, SLURM)
mpi_runner: "srun --mpi=pmi2"
# Gene prediction
prodigal_bin: "prodigal"
# Annotation
hmmer_bin: "/vol/cloud/agnostos-wf/bin/hmmsearch"
# Clustering config
ffindex_apply: "/vol/cloud/agnostos-wf/bin/ffindex_apply_mpi"
mmseqs_bin: "/vol/cloud/agnostos-wf/bin/mmseqs"
mmseqs_tmp: "/vol/cloud/agnostos-wf/db_update/tmp"
mmseqs_local_tmp: "/vol/scratch/tmp"
mmseqs_split_mem: "100G"
mmseqs_split: 10
# Clustering results config
seqtk_bin: "seqtk"
# Spurious and shadows config
hmmpress_bin: "/vol/cloud/agnostos-wf/bin/hmmpress"
# Compositional validation config
datamash_bin: "datamash"
famsa_bin: "/vol/cloud/agnostos-wf/bin/famsa"
odseq_bin: "/vol/cloud/agnostos-wf/bin/OD-seq"
parasail_bin: "/vol/cloud/agnostos-wf/bin/parasail_aligner"
parallel_bin: "parallel"
igraph_lib: "export LD_LIBRARY_PATH=/vol/cloud/agnostos-wf/bin/igraph/lib:${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
parasail_lib: "export LD_LIBRARY_PATH=/vol/cloud/agnostos-wf/lib:${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
# Cluster classification config
seqkit_bin: "seqkit"
filterbyname: "filterbyname.sh"
hhcons_bin: "/vol/cloud/agnostos-wf/bin/hh-suite/bin/hhconsensus"
# Cluster category refinement
hhsuite: "/vol/cloud/agnostos-wf/bin/hh-suite"
hhblits_bin_mpi: "/vol/cloud/agnostos-wf/bin/hh-suite/bin/hhblits_mpi"
hhmake: "/vol/cloud/agnostos-wf/bin/hh-suite/bin/hhmake"
hhblits_prob: 90
hypo_filt: 1.0
# Cluster communities
hhblits_bin: "/vol/cloud/agnostos-wf/bin/hh-suite/bin/hhblits"
hhsearch_bin: "/vol/cloud/agnostos-wf/bin/hh-suite/bin/hhsearch"