Skip to content

Commit

Permalink
Merge pull request #96 from GoekeLab/dev
Browse files Browse the repository at this point in the history
add assert "NNNNN" for process_tx and solve bug with stringtie gtf
  • Loading branch information
ploy-np authored Sep 17, 2021
2 parents 8722c06 + 0976da5 commit 8222dc1
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions xpore/scripts/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,19 @@ def readGTF(gtf_path_or_url):
dict={}
for ln in gtf:
if not ln.startswith("#"):
ln=ln.split("\t")
ln=ln.strip("\n").split("\t")
if ln[2] == "transcript" or ln[2] == "exon":
chr,type,start,end=ln[0],ln[2],int(ln[3]),int(ln[4])
tx_id=ln[-1].split('; transcript_id "')[1].split('";')[0]
g_id=ln[-1].split('gene_id "')[1].split('";')[0]
attrList=ln[-1].split(";")
attrDict={}
for k in attrList:
p=k.strip().split(" ")
if len(p) == 2:
attrDict[p[0]]=p[1].strip('\"')
##tx_id=ln[-1].split('; transcript_id "')[1].split('";')[0]
##g_id=ln[-1].split('gene_id "')[1].split('";')[0]
tx_id = attrDict["transcript_id"]
g_id = attrDict["gene_id"]
if tx_id not in dict:
dict[tx_id]={'chr':chr,'g_id':g_id,'strand':ln[6]}
if type not in dict[tx_id]:
Expand Down Expand Up @@ -570,6 +578,7 @@ def preprocess_tx(tx_id,data_dict,out_paths,locks):

try:
assert len(set(reference_kmer_array)) == 1
assert list(set(reference_kmer_array))[0].count('N') == 0 ##to weed out the mapped kmers from tx_seq that contain 'N', which is not in diffmod's model_kmer
except:
asserted = False
break
Expand Down

0 comments on commit 8222dc1

Please sign in to comment.