-
Notifications
You must be signed in to change notification settings - Fork 0
/
is_there_misassemblies.py
executable file
·82 lines (68 loc) · 2.57 KB
/
is_there_misassemblies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class SV:
def __init__(self, chrom, pos, pos_start, pos_end):
self.chrom = chrom
self.pos = pos
self.pos_start = pos_start
self.pos_end = pos_end
sv_dict = {}
with open("CHM1_180GB_CrG_GRCh38_phased_possorted.vcf", "r") as my_vcf:
for r in my_vcf.readlines():
if r.startswith("#"):
continue
chrom = r.split("\t")[0]
pos = int(r.split("\t")[1])
start_pos = int(r.split("\t")[11])
end_pos = int(r.split("\t")[12])
node = r.split("\t")[8]
new_sv = SV(chrom, pos, start_pos, end_pos)
if len(r.split("\t")[4]) < 300:
continue
if node not in sv_dict.keys():
sv_dict[node] = []
sv_dict[node].append(new_sv)
#print(sv_dict)
non_misassembled1 = {}
non_misassembled2 = {}
with open("CHM1.1.tsv", "r") as alignments:
lines = alignments.readlines()
for i in range(len(lines)):
r = lines[i]
if r[0].isdigit():
node = r.split("\t")[5]
if node not in sv_dict.keys():
continue
else:
for sv in sv_dict[node]:
start_pos = min(int(r.split("\t")[2]), int(r.split("\t")[3]))
end_pos = max(int(r.split("\t")[2]), int(r.split("\t")[3]))
if sv.pos_start > start_pos and sv.pos_end < end_pos:
if node not in non_misassembled1.keys():
non_misassembled1[node] = []
non_misassembled1[node].append(r)
with open("CHM1.2.tsv", "r") as alignments:
lines = alignments.readlines()
for i in range(len(lines)):
r = lines[i]
if r[0].isdigit():
node = r.split("\t")[5]
if node not in sv_dict.keys():
continue
else:
for sv in sv_dict[node]:
start_pos = min(int(r.split("\t")[2]), int(r.split("\t")[3]))
end_pos = max(int(r.split("\t")[2]), int(r.split("\t")[3]))
if sv.pos_start > start_pos and sv.pos_end < end_pos:
if node not in non_misassembled2.keys():
non_misassembled2[node] = []
non_misassembled2[node].append(r)
#print(non_misassembled1)
#print(non_misassembled2)
answer = []
for node in non_misassembled1.keys():
for r in non_misassembled1[node]:
answer.append(node)
for node in non_misassembled2.keys():
if node not in answer:
for r in non_misassembled2[node]:
answer.append(node)
print(answer)