Skip to content

Commit

Permalink
3 end quality, inframecount
Browse files Browse the repository at this point in the history
  • Loading branch information
zhpn1024 committed Nov 15, 2020
1 parent e2375a1 commit 82fb35b
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 18 deletions.
10 changes: 8 additions & 2 deletions src/run/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def set_parser(parser):
parser.add_argument("--seq", action="store_true", help="Report ORF sequences")
parser.add_argument("--aaseq", action="store_true", help="Report amino acid sequences")
parser.add_argument("--blocks", action="store_true", help="Report all exon block positions for predicted ORFs")
parser.add_argument("--inframecount", action="store_true", help="Report the sum of all counts at the in-frame positions in the ORF")
# Reads filters
parser.add_argument("--maxNH", type=int, default=5, help="Max NH value allowed for bam alignments (default: 5)")
parser.add_argument("--minMapQ", type=float, default=1, help="Min MapQ value required for bam alignments (default: 1)")
Expand All @@ -71,8 +72,9 @@ def run(args):
global tisbampaths, tisoffdict, ribobampaths, riboffdict, genomefapath, compatible, compatiblemis
global minaalen, enrichtest, slp, paras, verbose, alt, title, tis2ribo, gfilter
global tpth, fpth, minpth, fspth, framebest, framelocalbest, longest, transprofile, TIS_types #fspth
global paired, seq, aaseq, blocks # showtime
global paired, seq, aaseq, blocks, inframecount # showtime
paired, seq, aaseq, blocks = args.paired, args.seq, args.aaseq, args.blocks
inframecount = args.inframecount
ribo.maxNH, ribo.minMapQ, ribo.secondary = args.maxNH, args.minMapQ, args.secondary
tisbampaths = args.tisbampaths
ribobampaths = args.ribobampaths
Expand Down Expand Up @@ -282,6 +284,7 @@ class MyPool(multiprocessing.pool.Pool):
if seq : s += '\tSeq'
if aaseq : s += '\tAASeq'
if blocks: s += '\tBlocks'
if inframecount: s += '\tInFrameCount'
s += '\n'
outfile.write(s)

Expand All @@ -302,6 +305,7 @@ class MyPool(multiprocessing.pool.Pool):
if seq : s += '\t' + e.sq
if aaseq : s += '\t' + e.aa
if blocks: s += '\t' + e.blocks
if inframecount: s += '\t{}'.format(e.inframecount)
s += '\n'
if allout is not None : allout.write(s)
if e.q <= args.fsqth : outfile.write(s) # "%s\t%d\n" % (e, e.length)) #, e.sq))
Expand Down Expand Up @@ -422,6 +426,7 @@ def _pred_gene(ps): ### trans
if tp is not None and fsp > fspth : continue
has_stop = tsq[stop-3:stop] in orf.cstop
e = getResult(t, tis, stop, cds1, cds2, tsq, [ip, ttis.cnts[tis], tp, rp, 'N', fsp], has_stop)
if inframecount: e.inframecount = sum(tribo.cnts[tis:stop:3])
es.append(e)

else : #all possible ORFs
Expand Down Expand Up @@ -461,6 +466,7 @@ def _pred_gene(ps): ### trans
fsp, fss = stat.fisher_method([tps[i], rps[i]]) #
if fsp > fspth : continue
e = getResult(t, tis, o.stop, cds1, cds2, tsq, [ip, ttis.cnts[tis], tps[i], rps[i], rst[i], fsp], o.has_stop_codon)
if inframecount: e.inframecount = sum(tribo.cnts[tis:o.stop:3])
#tistype = tisType(tis, o.stop, cds1, cds2)
#orfstr = '{}\t{}\t{}'.format(tsq[tis:tis+3],tis,o.stop)
#tid = "%s\t%s\t%s\t%s\t%s:%d-%d:%s\t%s\t%s" % (t.gid, t.id, t.symbol, t.genetype, t.chr, t.genome_pos(tis), t.genome_pos(o.stop), t.strand, orfstr, tistype)
Expand All @@ -481,7 +487,7 @@ def _pred_gene(ps): ### trans
def getResult(t, tis, stop, cds1, cds2, tsq, values, has_stop = True):
tistype = tisType(tis, stop, cds1, cds2)
orfstr = '{}\t{}\t{}'.format(tsq[tis:tis+3], tis, stop)
gtis, gstop = t.genome_pos(tis), t.genome_pos(stop)
gtis, gstop = t.genome_pos(tis, bias=1), t.genome_pos(stop, bias=0)
if t.strand != '-' : genomestr = '{}:{}-{}:{}'.format(t.chr, gtis, gstop, t.strand)
else : genomestr = '{}:{}-{}:{}'.format(t.chr, gstop, gtis, t.strand)
tid = "%s\t%s\t%s\t%s\t%s\t%s\t%s" % (t.gid, t.id, t.symbol, t.genetype, genomestr, orfstr, TIS_types[tistype])
Expand Down
18 changes: 12 additions & 6 deletions src/run/quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def set_parser(parser):
parser.add_argument("--bins", type=int, default=20, help="Bins for cds profile (default: 20)")
parser.add_argument("--nom0", action="store_true", help="Do not consider reads with mismatch at position 0 as a new group")
parser.add_argument("--th", type=float, default=0.5, help="Threshold for quality (default: 0.5)")
parser.add_argument("--end3", action="store_true", help="Plot 3' end profile")
# Reads filters
parser.add_argument("--maxNH", type=int, default=1, help="Max NH value allowed for bam alignments (default: 1)")
parser.add_argument("--minMapQ", type=float, default=1, help="Min MapQ value required for bam alignments (default: 1)")
Expand Down Expand Up @@ -217,12 +218,14 @@ def frange(start=0,stop=1,step=1):
lst.append(i)
i += step
return lst
def stdisplot(ax, x, y, lab = '', hali = 'left', vali = 'top', frame = 3, f0 = 0, color = ['r','g','b'], size='medium', tcol = 'k'):
def stdisplot(ax, x, y, lab = '', hali = 'left', vali = 'top', frame = 3, f0 = 0, color = ['r','g','b'], size='medium', tcol = 'k', shift = 0):
for i in range(frame):
i0 = (f0 + i) % frame
ax.bar(x[i0::frame], y[i0::frame], width = 0.6, color = color[i], edgecolor = color[i],alpha=0.6, align='edge')
xs = [n + shift for n in x[i0::frame]]
ax.bar(xs, y[i0::frame], width = 0.6, color = color[i], edgecolor = color[i],alpha=0.6, align='edge')
if hali == 'left' : tx = min(x)
else : tx = max(x)
tx += shift
ty = nearest(max(y+[1]), up=False) ##
ax.text(tx ,max(y+[1])*0.7 , lab,horizontalalignment=hali,verticalalignment=vali, size=size, color = tcol)
ax.spines['right'].set_visible(False)
Expand Down Expand Up @@ -267,6 +270,9 @@ def plot4(gs, i, l, disf, dis1, dis2, disc, offdict, args, start = 0):
if m0 : ax0.yaxis.set_label_coords(-0.9, 0.4)
else : ax0.yaxis.set_label_coords(-0.4, 0.4)

shift = 0
if args.end3: shift = l

if args.tis :
use, tisframe, tistxt, mp = ribo.TISquality(dis1[l], dis = args.dis, threshold = args.th)
if tisframe != frame : use = False
Expand All @@ -284,16 +290,16 @@ def plot4(gs, i, l, disf, dis1, dis2, disc, offdict, args, start = 0):
if use : tcol = fbcols[frame]
ax1 = plot.subplot(gs[i, start+1:start+3])
x = list(range(*args.dis))
stdisplot(ax1, x, dis1[l], lab = txt, hali = 'left', f0 = -args.dis[0], color=fbcols, tcol = tcol)
stdisplot(ax1, x, dis1[l], lab = txt, hali = 'left', f0 = -args.dis[0], color=fbcols, tcol = tcol, shift = shift)

if use and not args.tis :
plot.hlines(th, -18, -6, color=tcol, linestyles ='dashed')
plot.hlines(th, -18+shift, -6+shift, color=tcol, linestyles ='dashed')
if args.tis :
#mp = ribo.getTIS(dis1[l], [d1,d2])
ax1.text(mp+1 ,max(dis1[l]+[1])*0.7 , tistxt, horizontalalignment='left',verticalalignment='top', size='medium', color = tcol)
ax1.text(mp+1+shift, max(dis1[l]+[1])*0.7 , tistxt, horizontalalignment='left',verticalalignment='top', size='medium', color = tcol)
ax2 = plot.subplot(gs[i, start+3:start+5])

stdisplot(ax2, x, dis2[l], lab = '', hali = 'right', f0 = -args.dis[0], color=fbcols, tcol = tcol)
stdisplot(ax2, x, dis2[l], lab = '', hali = 'right', f0 = -args.dis[0], color=fbcols, tcol = tcol, shift = shift)
ax3 = plot.subplot(gs[i, start+5])
cdsplot(ax3, disc[l])
if args.tis :
Expand Down
25 changes: 17 additions & 8 deletions src/zbio/gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
'''

def add_chr(chr):
if chr.isdigit() or chr in ('X','Y','M',): chr = 'chr' + chr
elif chr == 'MT' : chr = 'chrM'
if len(chr) <= 3 and (chr.isdigit() or chr in ('X','Y',)): chr = 'chr' + chr
elif chr in ('M','MT','MtDNA','mitochondrion_genome','Mito') : chr = 'chrM'
elif len(chr) == 2 and chr[0].isdigit(): chr = 'chr' + chr
elif chr in ('I','II','III','IV','V','VI','VII','VIII','IX','XI','XII','XIII','XIV','XV','XVI'): chr = 'chr' + chr
return chr
def rm_chr(chr):
if chr[3:].isdigit() or chr in ('chrX','chrY'): chr = chr[3:]
Expand Down Expand Up @@ -102,10 +104,15 @@ def tid(self):
try : return self.tid_c
except :
self.tid_c = self.attr('transcript_id')
if self.gff and self.tid_c == '':
p = self.attr('Parent')
if p.startswith('rna-'): self.tid_c = p[4:]
if p.startswith('gene-'): self.tid_c = p[5:]
if self.gff: # and self.tid_c == '':
if self.lst[2] in ('transcript', 'mRNA', 'tRNA', 'rRNA'):
self.tid_c = self.id
else:
p = self.attr('Parent')
if p.startswith('rna-'): self.tid_c = p[4:]
elif p.startswith('gene-'): self.tid_c = p[5:]
elif p.startswith('rna'): self.tid_c = p
elif p.startswith('gene'): self.tid_c = p
return self.tid_c
@property
def symbol(self):
Expand All @@ -119,6 +126,7 @@ def id(self):
try: return self._id
except:
self._id = self.attr('exon_id')
if self._id == '': self._id = self.attr('ID')
return self._id
@id.setter
def id(self, value):
Expand Down Expand Up @@ -250,7 +258,8 @@ class gtfTrans(Exon):
'''
def __init__(self, lst, gff = False, addchr = False):
Exon.__init__(self, lst, gff, addchr)
self.id = self.tid
if self.tid != '': self.id = self.tid
elif self.id != '': self.tid_c = self.id
#self.type = lst[1]
self.exons = []
self.cds = []
Expand Down Expand Up @@ -673,7 +682,7 @@ def gtfgene_iter(fin, filt = [], gff = False, addchr = False, chrs = None, verbo
genes[g.id] = g
genes2[g.id] = g
#if g.id not in gidlist: gidlist.append(g.id)
elif lst[2] == 'transcript':
elif lst[2] in ('transcript', 'mRNA', 'tRNA', 'rRNA'):
t = gtfTrans(lst, gff, addchr)
if t.gid not in genes:
g = gtfGene(lst, gff, addchr)
Expand Down
7 changes: 5 additions & 2 deletions src/zbio/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ def splitIter(filePath, sep = '\t', gz = False, skip = 0, title = None, encoding
if filePath.split('.')[-1].lower() == 'gz' : gz = True
if gz :
import gzip
infile = gzip.open(filePath, 'rt', encoding=encoding)
else : infile = open(filePath, 'r', encoding=encoding)
try: infile = gzip.open(filePath, 'rt', encoding=encoding)
except: infile = gzip.open(filePath)
else :
try: infile = open(filePath, 'r', encoding=encoding)
except: infile = open(filePath)
else : infile = filePath
for i in range(skip):
l = next(infile)
Expand Down

0 comments on commit 82fb35b

Please sign in to comment.