Skip to content

Commit

Permalink
Modify index update functions to work with annotation files without e…
Browse files Browse the repository at this point in the history
…lements - fix #25
  • Loading branch information
emi80 committed Nov 5, 2019
1 parent 39209f4 commit 2db2243
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
50 changes: 45 additions & 5 deletions annotation/annotation.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os"
"runtime/debug"
"sort"
"strings"
"sync"

"github.com/dhconnelly/rtreego"
Expand Down Expand Up @@ -155,21 +156,60 @@ func interleaveFeatures(features []*Feature, start, end float64, element string,
return fs
}

func updateIndex(index *rtreego.Rtree, start, end float64, feature, updated string, extremes bool, elems chan rtreego.Spatial) *rtreego.Rtree {
func getGenes(index *rtreego.Rtree, start, end float64) []rtreego.Spatial {
genes := QueryIndexByElement(index, start, end, "gene")
if len(genes) == 0 {
exons := QueryIndexByElement(index, start, end, "exon")
geneMap := make(map[string]FeatureSlice)
for _, f := range NewFeatureSlice(exons) {
geneMap[f.Tag("gene_id")] = append(geneMap[f.Tag("gene_id")], f)
}
for g, l := range geneMap {
var start, end float64
start = math.MaxFloat64
for _, e := range geneMap[g] {
if e.Start() < start {
start = e.Start()
}
if e.End() > end {
end = e.End()
}
}
f, err := parseFeature(
l[0].chr,
[]byte("gene"),
start,
end,
)
if err == nil {
f.SetTags(l[0].tags)
for k := range f.tags {
if !strings.HasPrefix(k, "gene") {
delete(f.tags, k)
}
}
genes = append(genes, f)
}
}
}
return genes
}

func updateIndex(index *rtreego.Rtree, start, end float64, elems chan rtreego.Spatial) *rtreego.Rtree {
if end-start <= 0 {
return index
}

var features []rtreego.Spatial
genes := getGenes(index, start, end)

genes := QueryIndexByElement(index, start, end, feature)
for _, i := range genes {
f := i.(*Feature)
features = append(features, f)
}
mergedGenes := mergeIntervals(genes)
for _, f := range interleaveFeatures(mergedGenes, start, end, feature, []byte(updated), extremes) {
if f.Element() == updated {
for _, f := range interleaveFeatures(mergedGenes, start, end, "gene", []byte("intergenic"), true) {
if f.Element() == "intergenic" {
features = append(features, f)
if os.Getenv(dumpElementsEnv) != "" {
elems <- f
Expand Down Expand Up @@ -205,7 +245,7 @@ func createTree(trees chan *tree, chr string, length float64, feats chan rtreego
wg.Add(1)
featSlice := chan2slice(feats)
tmpIndex := rtreego.NewTree(1, 25, 50, featSlice...)
t := tree{chr, updateIndex(tmpIndex, 0, length, "gene", "intergenic", true, elems)}
t := tree{chr, updateIndex(tmpIndex, 0, length, elems)}
trees <- &t
if os.Getenv(dumpElementsEnv) != "" && length == 0 {
for _, f := range featSlice {
Expand Down
8 changes: 6 additions & 2 deletions annotation/annotation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func TestIssue23(t *testing.T) {
chr: chr,
element: element,
location: newRect(rtreego.Point{12010}, []float64{47}, t),
},
},
&Feature{
chr: chr,
element: element,
Expand All @@ -341,7 +341,7 @@ func TestIssue23(t *testing.T) {
chr: chr,
element: element,
location: newRect(rtreego.Point{12975}, []float64{77}, t),
},
},
&Feature{
chr: chr,
element: element,
Expand Down Expand Up @@ -456,6 +456,10 @@ func TestReadFeatures(t *testing.T) {
"../data/coverage-test-shuffled.gtf.gz",
elems,
},
{
"../data/coverage-test-nogenes.gtf.gz",
elems,
},
} {
m := CreateIndex(i.f, chrLens)
index := m.Get("chr1")
Expand Down
Binary file added data/coverage-test-nogenes.gtf.gz
Binary file not shown.

0 comments on commit 2db2243

Please sign in to comment.