-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf_bit.py
51 lines (41 loc) · 1.32 KB
/
vcf_bit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
import bitarray as ba
import pickle as pl
from multiprocessing import Pool, Process
from argparse import ArgumentParser
def main(sIn):
lid = []
######################################################
# change the number of individuals in input vcf file #
######################################################
lRes = [ba.bitarray() for i in range(N)]
count = 0
with open(sIn, 'r') as f:
for l in f:
if l.startswith('#'):
continue
count += 1
r = l.split()
index = 0
for gt in r[9:]:
if '1' in gt:
lRes[index].append(True)
else:
lRes[index].append(False)
index += 1
fname = sIn.split('/')[-2]
output = open(fname + 'wholeRecords.pkl', 'wb')
pl.dump(lRes, output, -1)
output.close()
if __name__ == '__main__':
##########################
# change input vcf file #
##########################
# fname = '/data/diybu/challengeData16/c1beacon/wholeRecords/vcfSize500/chr10_selectedInd.vcf'
parser = ArgumentParser()
parser.add_argument('-f', '--filename')
parser.add_argument('-n', '--N')
args = parser.parse_args()
fname = args.filename
N = int(args.N)
main(fname)