-
Notifications
You must be signed in to change notification settings - Fork 18
/
get_spans.py
117 lines (100 loc) · 3.2 KB
/
get_spans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python
# Interrogate a sequence by finding out what features are present within a specified range
from __future__ import print_function
import sys
import argparse
from Bio import SeqIO
__author__ = "Joe R. J. Healey"
__version__ = "1.0"
__title__ = ""
__license__ = "GPLv3"
__doc__ = "A parser for the output of MultiGeneBlast"
__author_email__ = "jrj.healey@gmail.com"
try:
from Bio import SeqIO
except ImportError:
msg = """
Could not import the BioPython module which means it is probably
not installed, or at least not available in the PYTHONPATH for this
particular binary.
If you have conda (recommended) try running:
$ conda install -c anaconda biopython
or, alternatively with python/pip:
$ python -m pip install biopython
"""
sys.stderr.write(msg)
sys.exit(1)
def get_args():
"""Parse command line arguments"""
desc = """Probe sequence spans for features."""
epi = """Interrogate an input sequence for any features that fall within user defined ranges."""
try:
parser = argparse.ArgumentParser(
description=desc, epilog=epi, prog="get_spans.py"
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Verbose behaviour (extra information).",
)
parser.add_argument(
"-i", "--infile", action="store", help="Input sequence file."
)
parser.add_argument(
"-f",
"--format",
action="store",
default="genbank",
help="The format of the input file.",
)
parser.add_argument(
"-r",
"--range",
action="store",
help="The range of interest (specified as start:stop).",
)
parser.add_argument(
"-t",
"--type",
action="store",
default="CDS",
help="Restrict feature detection to just this type of feature.",
)
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
except NameError:
sys.stderr.write(
"An exception occurred with argument parsing. Check your provided options."
)
sys.exit(1)
return parser.parse_args()
def main():
args = get_args()
try:
rec = SeqIO.read(args.infile, args.format)
except ValueError as err:
raise ValueError(
"Caught: {}.\n"
"Currently this script is only capable of handling a single contiguous file "
"not multiple records.".format(err)
)
try:
start, end = args.range.split(":")
except AttributeError as err:
raise NameError(
"Caught: {}.\n" "Did you specify a range with -r|--range?".format(err)
)
if args.verbose:
print("Start: " + str(start) + ", End: " + str(end), file=sys.stderr)
desired = set(xrange(int(start), int(end), 1))
for feat in rec.features:
if feat.type == args.type:
span = set(
xrange(feat.location._start.position, feat.location._end.position)
)
if span & desired:
print(feat)
if __name__ == "__main__":
main()