This repository has been archived by the owner on Mar 14, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
imputePipe.py
140 lines (127 loc) · 5.08 KB
/
imputePipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import re
from subprocess import Popen, PIPE
import sys
import os
import argparse
def checkPath(filePre, fileSuf, chr_):
'''Given a prefix and file extension and a chr number checks if a file exists in current directory
'''
pathFile = os.path.join(os.getcwd(), '{}_CHR{}.{}'.format(filePre, chr_, fileSuf))
print(pathFile)
if os.path.exists(pathFile):
print('FOUND MATCHING FILE {}'.format(pathFile))
return True
else:
return False
def plinkSplitCall(filePre):
'''Given a base bed file, this function will submit a job to split files by chromosomes and return a job ID
'''
if os.path.exists(os.path.join(os.getcwd(), 'PLINK_SPLIT_SLURM.sh')):
plinKSplit='./PLINK_SPLIT_SLURM.sh {}'.format(filePre)
plinkCall=Popen(plinKSplit, shell=True, stdout=PIPE, stderr=PIPE)
stdout, stderr= plinkCall.communicate()
if not stderr:
job1 = re.findall(r'\d+', stdout.decode())[0]
if not job1:
exit('Job Submission Unsuccessful for Script {}'.format(plinKSplit))
else:
return job1
else:
exit('Job Submission Unsuccessful for Script {}'.format(plinKSplit))
else:
raise FileNotFoundError('Check Path PLINK_SPLIT_SLURM.sh')
def phasingCall(filePre, *args):
'''function to take input as the base plink file prefix and an optional job dependency for slurm
'''
if args:
dependency=args[0]
if dependency:
print('Job Dependency {}'.format(dependency))
shapeIt='./SHAPEIT_ARRAY_TASK_SLURM.sh {} {}'.format(filePre, dependency)
else:
shapeIt='./SHAPEIT_ARRAY_TASK_SLURM.sh {}'.format(filePre)
print(shapeIt)
if os.path.exists(os.path.join(os.getcwd(), 'SHAPEIT_ARRAY_TASK_SLURM.sh')):
shapeitCall=Popen(shapeIt, shell=True, stdout=PIPE, stderr=PIPE)
stdout, stderr= shapeitCall.communicate()
if not stderr:
job2 = re.findall(r'\d+', stdout.decode())[0]
if not job2:
exit('Job Submission Unsuccessful for Script {}'.format(shapeitCall))
else:
return job2
else:
exit('Job Submission Unsuccessful for Script {}'.format(shapeitCall))
else:
raise FileNotFoundError('Check Path SHAPEIT_ARRAY_TASK_SLURM.sh')
def imputeCall(filePre, *args):
'''makes the imputation calls with job dependency if applicable to the shapeit call
'''
chrSizes = [('1', '249250621'),
('2', '243199373'),
('3', '198022430'),
('4', '191154276'),
('5', '180915260'),
('6', '171115067'),
('7', '159138663'),
('8', '146364022'),
('9', '141213431'),
('10', '135534747'),
('11', '135006516'),
('12', '133851895'),
('13', '115169878'),
('14', '107349540'),
('15', '102531392'),
('16', '90354753'),
('17', '81195210'),
('18', '78077248'),
('20', '63025520'),
('19', '59128983'),
('21', '48129895'),
('22', '51304566')]
if args:
dependency=args[0]
if dependency:
print('Job Dependency {}'.format(dependency))
for chrTup in chrSizes:
chr_=chrTup[0]
chrLength=chrTup[1]
if len(chrLength) < 9:
chrSize = str(int(chrLength[:2])+1)
else:
chrSize = str(int(chrLength[:3])+1)
impute='./IMPUTE_LOOP_SLURM.sh {} {} {} {}'.format(chr_, chrSize, filePre, dependency)
Popen(impute, shell=True, stdout=PIPE, stderr=PIPE)
print(impute)
else:
for chrTup in chrSizes:
chr_=chrTup[0]
chrLength=chrTup[1]
if len(chrLength) < 9:
chrSize = str(int(chrLength[:2])+1)
else:
chrSize = str(int(chrLength[:3])+1)
impute='./IMPUTE_LOOP_SLURM.sh {} {} {}'.format(chr_, chrSize, filePre)
Popen(impute, shell=True, stdout=PIPE, stderr=PIPE)
print(impute)
def main():
parser = argparse.ArgumentParser(description='Imputation Pipeline Main')
parser.add_argument('-F', help='File Prefix for the base BED file unspit by chromosomes', required=True)
args=parser.parse_args()
filePre=args.F
plinkFile=checkPath(filePre, fileSuf='bed', chr_='2')
hapFile=checkPath(filePre, fileSuf='haps', chr_='2')
if plinkFile: ## if plinkfile exists check for hap file
if hapFile: ## if hap file exists go to imputation
imputeCall(filePre)
else:
job2=phasingCall(filePre)
if job2:
imputeCall(filePre, job2)
else:
job1 = plinkSplitCall(filePre)
if job1:
job2 = phasingCall(filePre, job1)
if job2:
imputeCall(filePre, job2)
if __name__ == "__main__":main()