-
Notifications
You must be signed in to change notification settings - Fork 3
/
bedClass.py
80 lines (64 loc) · 2.61 KB
/
bedClass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/python
import os.path
import sys
class bed:
def __init__(self):
self.numberTargets = 0
self.referenceSequences = {}
self.referenceSequenceList = []
def openBed(self, filename):
if filename == "stdin": self.filehandle = sys.stdin
else:
try: self.filehandle = open(filename,"r")
except IOError:
print >> sys.stderr, "Failed to find file: ",filename
exit(1)
# Get a bed record.
def getRecord(self):
self.record = self.filehandle.readline()
if not self.record: return False
self.numberTargets = self.numberTargets + 1
self.ref = ""
self.start = 0
self.end = 0
# bed file should be 0-based, half-open, so the start coordinate
# must be that in the bed file plus one.
entries = self.record.rstrip("\n").split("\t")
self.referenceSequence = entries[0]
# Add the reference sequence to the dictionary. If it didn't previously
# exist append the reference sequence to the end of the list as well.
# This ensures that the order in which the reference sequences appeared
# in the header can be preserved.
if self.referenceSequence not in self.referenceSequences:
self.referenceSequences[self.referenceSequence] = True
self.referenceSequenceList.append(self.referenceSequence)
try: self.start = int(entries[1]) + 1
except:
text = "start position need is not an integer"
self.generalError(text, "start", entries[1])
try: self.end = int(entries[2])
except:
text = "end position need is not an integer"
self.generalError(text, "end", entries[2])
# Check that the record is a valid interval.
if self.end - self.start < 0:
print >> sys.stderr, "Invalid target interval:\n\t", self.record
exit(1)
return True
# Parse through the bed file until the correct reference sequence is
# encountered and the end position is greater than or equal to that requested.
def parseBed(self, referenceSequence, position):
success = True
if self.referenceSequence != referenceSequence:
while self.referenceSequence != referenceSequence and success: success = self.getRecord()
while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord()
return success
# Close the bed file.
def closeBed(self, filename):
self.filehandle.close()
# Define error messages for different handled errors.
def generalError(self, text, field, fieldValue):
print >> sys.stderr, "\nError encountered when attempting to read:"
if field != "": print >> sys.stderr, "\t", field, ": ", fieldValue
print >> sys.stderr, "\n", text
exit(1)