Skip to content

Commit

Permalink
Update gtf to tss script (#46)
Browse files Browse the repository at this point in the history
- Updated the script to generate tss file from ensembl gtf file to properly parse the gene_type column to fix issue of creation of empty tss file.
  • Loading branch information
archanaraja authored Oct 19, 2023
1 parent 3c386b6 commit 21cf116
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions src/tss_from_gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,21 @@
break

chrom = l[0]

# print (l[8])
gene_id = l[8].split(';')[0].split(' ')[1]
gene_id = re.sub("\"", "", gene_id)
# print (gene_id)
for i in(l[8].split(';')):
if "gene_biotype" in i :
gene_type=i.split(" ")[2]
gene_type = re.sub("\"", "", gene_type)
if not gene_type == 'protein_coding':
continue
# print (gene_type)
end = tss
start = int(tss) - 1

gene_type = l[8].split(';')[2].split(' ')[1]
if not gene_type == 'protein_coding':
continue
out.write('\t'.join([chrom, str(start), end, gene_id, '0', strand])+'\n')

end = tss
start = int(tss) - 1

out.write('\t'.join([chrom, str(start), end, gene_id, '0', strand])+'\n')

0 comments on commit 21cf116

Please sign in to comment.