forked from sc-zhang/bioscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
split_fasta_by_chr.py
executable file
·37 lines (32 loc) · 902 Bytes
/
split_fasta_by_chr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/python
import sys, os
def split_fasta_by_chr(fasta_file, out_folder):
if os.path.exists(out_folder) == False:
os.mkdir(out_folder)
seq_db = {}
with open(fasta_file, 'r') as f_fasta:
seq = ''
seq_id = ''
for line in f_fasta:
if line[0] == ">":
if seq != '':
seq_db[seq_id] = seq
seq_id = line.strip()
seq = ''
else:
seq += line.strip()
seq_db[seq_id] = seq
for seq_id in seq_db:
if seq_id[:4].lower() != '>chr':
continue
f_out = open(out_folder+"/"+seq_id[1:]+".fasta", 'w')
f_out.write(seq_id+"\n"+seq_db[seq_id])
f_out.close()
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Notice: script for spliting fasta into serval files contain single chromosome")
print("Usage: python " + sys.argv[0] + " <in_fasta> <out_dir>")
else:
in_fasta = sys.argv[1]
out_dir = sys.argv[2]
split_fasta_by_chr(in_fasta, out_dir)