-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Conflicts: README.md sam/simple-filters.go
- Loading branch information
Showing
8 changed files
with
374 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
package bed | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"os" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/exascience/elprep/utils" | ||
) | ||
|
||
/* | ||
Helper function for parsing a track line field. | ||
*/ | ||
func splitTrackField(field string) (string, string) { | ||
split := strings.Split(field, "=") | ||
return split[0], split[1] | ||
} | ||
|
||
/* | ||
Parses a BED file. See | ||
https://genome.ucsc.edu/FAQ/FAQformat.html#format1 | ||
*/ | ||
func ParseBed(filename string) (b *Bed, err error) { | ||
|
||
bed := NewBed() | ||
|
||
// open file | ||
file, err := os.Open(filename) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer func() { | ||
if nerr := file.Close(); err == nil { | ||
err = nerr | ||
} | ||
}() | ||
|
||
scanner := bufio.NewScanner(file) | ||
|
||
var track *BedTrack // for storing the current track | ||
|
||
for scanner.Scan() { | ||
line := scanner.Text() | ||
data := strings.Split(line, "\t") | ||
// check if the line is a new track | ||
if data[0] == "track" { | ||
// create new track, store the old one | ||
if track != nil { | ||
bed.Tracks = append(bed.Tracks, track) | ||
} | ||
// all track entries are optional | ||
// parse and collect those that are used | ||
fields := make(map[string]string) | ||
for _, field := range data[1:] { | ||
key, val := splitTrackField(field) | ||
fields[key] = val | ||
} | ||
track = NewBedTrack(fields) | ||
} else { | ||
// parse a region entry | ||
chrom := utils.Intern(data[0]) | ||
var err error | ||
start, err := strconv.Atoi(data[1]) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid bed region start: %v ", err) | ||
} | ||
end, err := strconv.Atoi(data[2]) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid bed region end: %v ", err) | ||
} | ||
region, err := NewBedRegion(chrom, int32(start), int32(end), data[3:]) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid bed region: %v ", err) | ||
} | ||
AddBedRegion(bed, region) | ||
if track != nil { | ||
track.Regions = append(track.Regions, region) | ||
} | ||
} | ||
|
||
} | ||
if err := scanner.Err(); err != nil { | ||
return nil, fmt.Errorf("Error while reading bed file: %v ", err) | ||
} | ||
// Make sure bed regions are sorted. | ||
sortBedRegions(bed) | ||
return bed, nil | ||
} | ||
|
||
func printParsedBed(bed *Bed) { | ||
fmt.Println("Bed{") | ||
for k, r := range bed.RegionMap { | ||
fmt.Println("Chrom ", *k, " :") | ||
for _, v := range r { | ||
fmt.Println("BedRegion{", *v.Chrom, v.Start, v.End, " }") | ||
} | ||
} | ||
fmt.Println("}") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
package bed | ||
|
||
import ( | ||
"fmt" | ||
"sort" | ||
"strconv" | ||
|
||
"github.com/exascience/elprep/utils" | ||
) | ||
|
||
/* | ||
A struct for representing the contents of a BED file. See | ||
https://genome.ucsc.edu/FAQ/FAQformat.html#format1 | ||
*/ | ||
type Bed struct { | ||
// Bed tracks defined in the file. | ||
Tracks []*BedTrack | ||
// Maps chromosome name onto bed regions. | ||
RegionMap map[utils.Symbol][]*BedRegion | ||
} | ||
|
||
/* | ||
A struct for representing BED tracks. See | ||
https://genome.ucsc.edu/FAQ/FAQformat.html#format1 | ||
*/ | ||
type BedTrack struct { | ||
// All track fields are optional. | ||
Fields map[string]string | ||
// The bed regions this track groups together. | ||
Regions []*BedRegion | ||
} | ||
|
||
/* | ||
An interval as defined in a BED file. See | ||
https://genome.ucsc.edu/FAQ/FAQformat.html#format1 | ||
*/ | ||
type BedRegion struct { | ||
Chrom utils.Symbol | ||
Start int32 | ||
End int32 | ||
OptionalFields []interface{} | ||
} | ||
|
||
// Symbols for optional strand field of a BedRegion. | ||
var ( | ||
// Strand forward. | ||
SF = utils.Intern("+") | ||
// Strand reverse. | ||
SR = utils.Intern("-") | ||
) | ||
|
||
/* | ||
Allocates and initializes a new BedRegion. Optional fields are given | ||
in order. If a "later" field is entered, then the "earlier" field was | ||
entered as well. See https://genome.ucsc.edu/FAQ/FAQformat.html#format1 | ||
*/ | ||
func NewBedRegion(chrom utils.Symbol, start int32, end int32, fields []string) (b *BedRegion, err error) { | ||
bedRegionFields, err := initializeBedRegionFields(fields) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &BedRegion{ | ||
Chrom: chrom, | ||
Start: start, | ||
End: end, | ||
OptionalFields: bedRegionFields, | ||
}, nil | ||
} | ||
|
||
// Valid bed region optional fields. See spec. | ||
const ( | ||
brName = iota | ||
brScore | ||
brStrand | ||
brThickStart | ||
brThickEnd | ||
brItemRgb | ||
brBlockCount | ||
brBlockSizes | ||
brBlockStarts | ||
) | ||
|
||
/* | ||
Allocates a fresh SmallMap to initialize a BedRegion's optional fields. | ||
*/ | ||
func initializeBedRegionFields(fields []string) ([]interface{}, error) { | ||
brFields := make([]interface{}, len(fields)) | ||
for i, val := range fields { | ||
switch i { | ||
case brName: | ||
brFields[brName] = val | ||
case brScore: | ||
score, err := strconv.Atoi(val) | ||
if err != nil || score < 0 || score > 1000 { | ||
return nil, fmt.Errorf("Invalid Score field : %v", err) | ||
} | ||
brFields[brScore] = score | ||
case brStrand: | ||
if val != "+" && val != "-" { | ||
return nil, fmt.Errorf("Invalid Strand field: %v", val) | ||
} | ||
brFields[brStrand] = utils.Intern(val) | ||
case brThickStart: | ||
start, err := strconv.Atoi(val) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid ThickStart field: %v", err) | ||
} | ||
brFields[brThickStart] = start | ||
case brThickEnd: | ||
end, err := strconv.Atoi(val) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid ThickEnd field: %v", err) | ||
} | ||
brFields[brThickEnd] = end | ||
case brItemRgb: | ||
if val == "on" { | ||
brFields[brItemRgb] = true | ||
} else { | ||
brFields[brItemRgb] = false | ||
} | ||
case brBlockCount: | ||
count, err := strconv.Atoi(val) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid BlockCount field: %v", err) | ||
} | ||
brFields[brBlockCount] = count | ||
case brBlockSizes: | ||
sizes, err := strconv.Atoi(val) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid BlockSizes field: %v", err) | ||
} | ||
brFields[brBlockSizes] = sizes | ||
case brBlockStarts: | ||
start, err := strconv.Atoi(val) | ||
if err != nil { | ||
return nil, fmt.Errorf("Invalid BlockStarts field: %v", err) | ||
} | ||
brFields[brBlockStarts] = start | ||
default: | ||
return nil, fmt.Errorf("Invalid optional field: %v out of 0-8.", val) | ||
} | ||
} | ||
return brFields, nil | ||
} | ||
|
||
/* | ||
Allocates and initializes a new BedTrack. | ||
*/ | ||
func NewBedTrack(fields map[string]string) *BedTrack { | ||
return &BedTrack{ | ||
Fields: fields, | ||
} | ||
} | ||
|
||
/* | ||
Allocates and initializes an empty bed. | ||
*/ | ||
func NewBed() *Bed { | ||
return &Bed{ | ||
RegionMap: make(map[utils.Symbol][]*BedRegion), | ||
} | ||
} | ||
|
||
/* | ||
Add region to the bed region map. | ||
*/ | ||
func AddBedRegion(bed *Bed, region *BedRegion) { | ||
// append the region entry | ||
bed.RegionMap[region.Chrom] = append(bed.RegionMap[region.Chrom], region) | ||
} | ||
|
||
/* | ||
A function for sorting the bed regions. | ||
*/ | ||
func sortBedRegions(bed *Bed) { | ||
for _, regions := range bed.RegionMap { | ||
sort.SliceStable(regions, func(i, j int) bool { | ||
return regions[i].Start < regions[j].Start | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/* | ||
Package bed is a library for parsing and representing BED files. See | ||
https://genome.ucsc.edu/FAQ/FAQformat.html#format1 | ||
*/ | ||
package bed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.