-
Notifications
You must be signed in to change notification settings - Fork 0
/
Add_Species.py
62 lines (54 loc) · 1.74 KB
/
Add_Species.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import BF_v2
import os
import csv
import pickle
import sys
from Bio import SeqIO, SeqRecord, Seq
import search_filter
def write_file(name):
"""Adds new species name to the species-list"""
itemlist = pickle.load(open(r'filter/FilterSpecies.txt', 'rb'))
itemlist.append(name)
itemlist.sort(key = lambda x: x.split(",")[-1][:2])
with open(r'filter/FilterSpecies.txt', 'wb') as fp:
pickle.dump(itemlist, fp)
def train_Core():
"""trains (concatenated-)genomes into BF and saves them"""
files = os.listdir(r'filter\new_species')
for i in range(len(files) -1, -1, -1):
if 'fna' in files[i] or 'fasta' in files[i]:
continue
else:
del files[i]
for i in range(len(files)):
#set BF-parameters
BF = BF_v2.AbaumanniiBloomfilter(115000000)
BF.set_arraysize(115000000)
BF.set_clonetypes(1)
BF.set_hashes(7)
BF.set_k(20)
path = r'filter/new_species/' + files[i]
name = files[i].split('.')[-2] + '.txt'
result = r'filter/species/' + name
BF.train_sequence(path, 0)
BF.save_clonetypes(result)
print("Added A. " + files[i].split('.')[-2] + " to AspecT")
BF.cleanup()
def main():
if len(sys.argv) != 1:
print("Training Bloom Filters...")
train_Core()
for i in range(len(sys.argv)):
if i == 0:
continue
# saving new species names
write_file(sys.argv[i])
print("Calculating new Training-Data...")
BF = search_filter.pre_processing()
# generate new training-data
BF.helper()
print("Finished!")
else:
print("Error: No Species-Name entered!")
if __name__ == '__main__':
main()