-
Notifications
You must be signed in to change notification settings - Fork 0
/
tcga_reheader.py
executable file
·71 lines (61 loc) · 1.97 KB
/
tcga_reheader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
import os
import re
import sys
import yaml
import subprocess
import tempfile
from StringIO import StringIO
def which(cmd):
cmd = ["which",cmd]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
res = p.stdout.readline().rstrip()
if len(res) == 0: return None
return res
def parse_rg(line):
res = re.search("^@RG\t(.*)$", line)
text = res.group(1)
fields = {}
for f in text.split("\t"):
res = re.search("^(\w+):(.*)$", f)
fields[res.group(1)] = res.group(2)
return fields
def write_rg(fields):
rgm = []
rgm.append( "ID:" + fields['ID'])
for k,v in fields.items():
if k != 'ID':
rgm.append( "%s:%s" % (k,v) )
return "@RG\t%s" % ( "\t".join( rgm ))
if __name__ == '__main__':
config_file = sys.argv[1]
in_bam = sys.argv[2]
out_bam = sys.argv[3]
#get original header
sam_tools = which("samtools")
if sam_tools is None:
raise Exception("samtools not found")
proc = subprocess.Popen( [sam_tools, "view", "-H", in_bam], stdout=subprocess.PIPE )
stdout, stderr = proc.communicate()
if proc.returncode != 0:
raise Exception("Failed to read input BAM")
with open(config_file) as handle:
config = yaml.load(handle.read())
header = StringIO()
for line in stdout.split("\n"):
if len(line):
if 'RG' in config:
if line.startswith("@RG"):
rg = parse_rg(line)
for k,v in config['RG'].items():
rg[k] = v
line = write_rg(rg)
header.write(line + "\n")
(t, tfile) = tempfile.mkstemp()
os.close(t)
with open(tfile, "w") as handle:
handle.write(header.getvalue())
proc = subprocess.Popen( "samtools reheader %s %s > %s" % (tfile, in_bam, out_bam), shell=True )
stdout, stderr = proc.communicate()
if proc.returncode != 0:
raise Exception("Failed to read input BAM")