Skip to content

Commit

Permalink
Merge pull request #11 from Runsheng/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Runsheng authored Jan 25, 2023
2 parents 88b7123 + 6025fa3 commit 514e634
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 19 deletions.
29 changes: 29 additions & 0 deletions script/bigg2len.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import os,sys,inspect

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
sys.path.insert(0,parentdir)

from trackcluster.tracklist import read_bigg, write_bigg

parser=argparse.ArgumentParser()
parser.add_argument("-b", "--biggfile",
help="the bigg bed file")
parser.add_argument("-o", "--out", default="mapped_len.txt",
help="the output file name")

args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])

# make a file using the functions
outfile=args.out

bigg_l=read_bigg(args.biggfile)
with open (outfile, "w") as fw:
for bigg in bigg_l:
bigg.get_exon()
fw.write(bigg.name+"\t"+str(bigg.exonlen)+"\t"+bigg.geneName+"\t"+bigg.ttype+"\n")


2 changes: 1 addition & 1 deletion script/trackrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def test(self):

if args.install: ## test installed packages, samtools, bedtools
if is_bin_in("samtools") and is_bin_in("bedtools") and is_bin_in("minimap2"):
logger.info("Pass")
logger.info("samtools, bedtools, minimap2, Pass")
else:
logger.info("Check if samtools, bedtools and minimap2 are in $PATH")

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'script/bam2bigg.py',
'script/bigg2b.py',
'script/gff2bigg.py',
'script/biggmutant.py'],
'script/biggmutant.py',
'script/bigg2len.py'],

)
2 changes: 1 addition & 1 deletion test/flow_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_prepare_run_gene_novel(self):
:return:
"""
# parameters
wkdir="/t1/shoudong_488/test/tracktest"
wkdir="/t1/shoudong_488/test/trackall"
prefix="488_aba_1"
gff_bed="../gene.bed_s"
nano_bed="../488_aba_1_s.bed"
Expand Down
1 change: 1 addition & 0 deletions test/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def test_is_bin_in(self):

def test_is_package_installed(self):
print(is_package_installed("Bio"))
print(is_package_installed("os"))

def test_summary(self):
logger = log_summary()
Expand Down
2 changes: 1 addition & 1 deletion trackcluster/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# For relative imports to work in Python 3.6
import os, sys
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
__version__="0.1.4"
__version__="0.1.6"
__all__ = []
18 changes: 13 additions & 5 deletions trackcluster/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from trackcluster.utils import myexe, is_bin_in, get_file_prefix,del_files,parmap, name2file, file2name, list2file, file2list
from trackcluster.convert import sam_to_bigGenePred
from trackcluster.batch import process_one_junction_corrected_try, process_one_subsample_try
from trackcluster.tracklist import read_bigg, write_bigg, cat_bed, bigg_count_write_native, is_a_read
from trackcluster.tracklist import read_bigg, write_bigg, cat_bed, bigg_count_write_native, is_a_read, list_to_dic

from trackcluster.pre import wrapper_bedtools_intersect2_select, tracklist_add_gene,get_gendic_bedinter,group_bigg_by_gene, wrapper_bedtools_merge, mergedbed2bigg, wrapper_bedtools_subtract
from trackcluster.post import flow_desc, flow_class4
Expand Down Expand Up @@ -83,17 +83,25 @@ def flow_bamconvert(wkdir,bamfile,out,prefix,score=30):
def flow_add_gene(wkdir, prefix, bigg_gff_file, bigg_nano_file, f1=0.01, f2=0.05):
os.chdir(wkdir)

### get two parts, the gene part and the novel part

# make sure one read one track
bigg_raw=read_bigg(bigg_nano_file)
bigg_dedup=list(list_to_dic(bigg_raw).values())
print("raw bigg number: {}; after dedup:{}".format(len(bigg_raw), len(bigg_dedup)))
outbed=prefix+"_dedup.bed"
write_bigg(bigg_dedup, outbed)

### get two parts, the gene part and the novel part,

# the gene part
outfile = prefix + "inter.bed"
# write the outfile to disk
wrapper_bedtools_intersect2_select(bigg_nano_file, bigg_gff_file, outfile=outfile,
wrapper_bedtools_intersect2_select(outbed, bigg_gff_file, outfile=outfile,
fraction_bed1=f1, fraction_bed2=f2)
read_gene = get_gendic_bedinter(outfile)
print("read number in genes:", len(read_gene))

bigg_nano = read_bigg(bigg_nano_file)
bigg_new = tracklist_add_gene(bigg_nano, read_gene)
bigg_new = tracklist_add_gene(bigg_dedup, read_gene)

# cleanup
del_files([outfile])
Expand Down
7 changes: 5 additions & 2 deletions trackcluster/pre.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ def wrapper_bedtools_merge(bigg_file, out):
# merge with strandness, report to col5
# bedtools 2.26, can use -s -c 4 -o count
# bedtools 2.30, must use -s c 6 -o distinct,count
cmd="bedtools merge -nonamecheck -s -c 6 -o distinct,count -i {biggfile} > {out}".format(
cmd="bedtools sort -i {biggfile} > {biggfile}_s && bedtools merge -nonamecheck -s -c 6 -o distinct,count -i {biggfile}_s > {out}".format(
biggfile=bigg_file, out=out
)
_=myexe(cmd)
myexe(cmd)

return out

Expand Down Expand Up @@ -207,6 +207,9 @@ def tracklist_add_gene(bigg_nano, read_gene):
:param bigg_nano: read track to bed added
:param read_gene:
:return: bigg_nano with genName has mutiple gene1||gene2||gene3
The gene name is used to define the "fusion" gene,so need to make sure one read only have one track
"""
bigg_new=[]
for bigg in bigg_nano:
Expand Down
24 changes: 22 additions & 2 deletions trackcluster/tracklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,31 @@ def write_bigg(bigg_list, out="bigg_new.bed"):


def list_to_dic(bigg_list):
"""
remove the duplicated mapping in bigg_list
if minimap2 do not force one mapping region for one read, there could be multiple mapping region for each reads
which could make the too many fusion isoforms
the non-dup bigg_list can be get with list(bigg_dic.values())
used in flow_addgene
:param bigg_list:
:return:
"""

bigg_dic=OrderedDict()

for i in bigg_list:
bigg_dic[i.name]=i
for bigg in bigg_list:
bigg.get_exon()
try:
bigg_previous=bigg_dic[bigg.name]
if bigg_previous.exonlen>=bigg.exonlen:
pass
else:
bigg_dic[bigg.name]=bigg
except KeyError:
bigg_dic[bigg.name] = bigg

return bigg_dic


Expand Down
19 changes: 13 additions & 6 deletions trackcluster/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,20 @@ def is_bin_in(cmd_name):
return False


def is_package_installed(package_name):
try:
import package_name
except ImportError:
return False
return True
def is_package_installed(name):
import importlib.util
import sys

if name in sys.modules:
return True
elif (spec := importlib.util.find_spec(name)) is not None:
# If you choose to perform the actual import ...
module = importlib.util.module_from_spec(spec)
sys.modules[name] = module
spec.loader.exec_module(module)
return True
else:
return False

def set_tmp(wkdir=None):
"""
Expand Down

0 comments on commit 514e634

Please sign in to comment.