forked from sc-zhang/bioscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
modify_geno_with_snp_mummer.py
executable file
·53 lines (47 loc) · 1.31 KB
/
modify_geno_with_snp_mummer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/python
import sys
def modify_geno(in_geno, in_snp, col, out_geno):
snp_db = {}
with open(in_snp, 'r') as f_in:
for line in f_in:
data = line.strip().split()
if data == []:
continue
if data[0].isdigit() == False:
continue
chrn = data[-1]
if chrn not in snp_db:
snp_db[chrn] = {}
pos = int(data[3])
snp_db[chrn][pos] = data[1].replace('.', 'N') + '/'+data[1].replace('.', 'N')
cnt_ovlp_snp = 0
with open(in_geno, 'r') as f_in:
with open(out_geno, 'w') as f_out:
for line in f_in:
if line[0] == "#":
f_out.write(line)
continue
data = line.strip().split()
for i in range(0, len(data)):
if i == col:
if int(data[1]) in snp_db[data[0]]:
cnt_ovlp_snp += 1
f_out.write(snp_db[data[0]][int(data[1])])
else:
f_out.write(data[i])
else:
f_out.write(data[i])
if i < len(data)-1:
f_out.write('\t')
f_out.write('\n')
print(cnt_ovlp_snp)
if __name__ == "__main__":
if len(sys.argv) < 4:
print("Notice: modify column in geno file with snp result generated by show-snps of mummer")
print("Usage: python "+sys.argv[0]+" <in_geno> <in_snp> <col> <out_geno>")
else:
in_geno = sys.argv[1]
in_snp = sys.argv[2]
col = int(sys.argv[3])
out_geno = sys.argv[4]
modify_geno(in_geno, in_snp, col, out_geno)