-
Notifications
You must be signed in to change notification settings - Fork 3
/
build_models.py
executable file
·108 lines (83 loc) · 3.93 KB
/
build_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import tempfile
import os
import sys
import shutil
from threading import Thread
from argparse import ArgumentParser
import logging
from hommod.controllers.fasta import parse_fasta
from hommod.models.template import TemplateID
from hommod.controllers.domain import domain_aligner
from hommod.controllers.model import modeler
from hommod.services.uniprot import uniprot
from hommod.services.interpro import interpro
from hommod.controllers.kmad import kmad_aligner
from hommod.controllers.clustal import clustal_aligner
from hommod.controllers.storage import model_storage
from hommod.controllers.blast import blaster
from hommod.controllers.blacklist import blacklister
from hommod.services.dssp import dssp
from hommod.services.helpers.cache import cache_manager as cm
import hommod.default_settings as settings
class ModelThread(Thread):
def __init__(self, sequence, species_id, domain_alignment, output_dir):
Thread.__init__(self)
self.daemon = True
self.sequence = sequence
self.species_id = species_id
self.domain_alignment = domain_alignment
self.output_dir = output_dir
def run(self):
path = modeler.build_model(self.sequence, self.species_id, self.domain_alignment)
_log.info(path)
shutil.copy(path, self.output_dir)
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
_log = logging.getLogger(__name__)
if __name__ == "__main__":
cm.disable()
uniprot.url = settings.UNIPROT_URL
interpro.url = settings.INTERPRO_URL
kmad_aligner.kmad_exe = settings.KMAD_EXE
clustal_aligner.clustalw_exe = settings.CLUSTALW_EXE
modeler.yasara_dir = settings.YASARA_DIR
modeler.uniprot_databank = settings.UNIPROT_BLAST_DATABANK
domain_aligner.forbidden_interpro_domains = settings.FORBIDDEN_INTERPRO_DOMAINS
domain_aligner.similar_ranges_min_overlap_percentage = settings.SIMILAR_RANGES_MIN_OVERLAP_PERCENTAGE
domain_aligner.similar_ranges_max_length_difference_percentage = settings.SIMILAR_RANGES_MAX_LENGTH_DIFFERENCE_PERCENTAGE
domain_aligner.min_percentage_coverage = settings.DOMAIN_MIN_PERCENTAGE_COVERAGE
domain_aligner.template_blast_databank = settings.TEMPLATE_BLAST_DATABANK
domain_aligner.highly_homologous_percentage_identity = settings.HIGHLY_HOMOLOGOUS_PERCENTAGE_IDENTITY
blaster.blastp_exe = settings.BLASTP_EXE
blacklister.file_path = settings.BLACKLIST_FILE_PATH
dssp.dssp_dir = settings.DSSP_DIR
arg_parser = ArgumentParser(description="Build Models for given criteria")
arg_parser.add_argument('--output-dir', help="output dir to put the models in")
arg_parser.add_argument('fasta', help="fasta with input target sequence")
arg_parser.add_argument('species', help="target species id")
arg_parser.add_argument('--position', help="residue position that the models should cover", type=int)
arg_parser.add_argument('--template', help="underscore separated template pdbid and chain")
args = arg_parser.parse_args()
tmp_dir = tempfile.mkdtemp()
model_storage.model_dir = tmp_dir
final_output_dir = settings.MODEL_DIR
if args.output_dir:
final_output_dir = args.output_dir
if not os.path.isdir(final_output_dir):
raise ValueError("Not a directory: {}".format(final_output_dir))
try:
sequence = list(parse_fasta(args.fasta).values())[0]
species_id = args.species.upper()
if args.template:
pdbid, chain_id = args.template.split('_')
template_id = TemplateID(pdbid, chain_id)
else:
template_id = None
domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id)
_log.info("{} domain alignments".format(len(domain_alignments)))
ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments]
for t in ts:
t.start()
for t in ts:
t.join()
finally:
shutil.rmtree(tmp_dir)