Skip to content

Commit

Permalink
bed-combine => bed-merge, added option for not splitting
Browse files Browse the repository at this point in the history
This will be much faster than bed-reduce for sorted BED files, and can
operate in a similar manner (except that score isn't used, but this
could be added easily to the internal class MultinameBedRecord)
  • Loading branch information
mbreese committed Dec 13, 2024
1 parent 1254f35 commit 8033043
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/java/io/compgen/ngsutils/NGSUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
import io.compgen.ngsutils.cli.bam.BinCount;
import io.compgen.ngsutils.cli.bam.PileupCli;
import io.compgen.ngsutils.cli.bed.BedCleanScore;
import io.compgen.ngsutils.cli.bed.BedCombine;
import io.compgen.ngsutils.cli.bed.BedMerge;
import io.compgen.ngsutils.cli.bed.BedCount;
import io.compgen.ngsutils.cli.bed.BedNearest;
import io.compgen.ngsutils.cli.bed.BedPEToBed;
Expand Down Expand Up @@ -232,7 +232,7 @@ public static void main(String[] args) {
.addCommand(BamToBedPE.class)
.addCommand(BedPEToBed.class)
.addCommand(BamClean.class)
.addCommand(BedCombine.class)
.addCommand(BedMerge.class)
.addCommand(VCFPeptide.class)
.addCommand(FastaGrep.class)
.addCommand(BamToFasta.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
import io.compgen.ngsutils.support.BufferedIteratorImpl;
import io.compgen.ngsutils.support.ListSortedBufferedIterator;

@Command(name="bed-combine", desc="Given two or more (sorted) BED(3/6) files, combine the BED annotations into one output BED file. This will produce non-overlapping regions (unlike bed-reduce). All other columns (and score) are ignored.", category="bed", experimental=true)
public class BedCombine extends AbstractOutputCommand {
@Command(name="bed-merge", desc="Given two or more (sorted) BED(3/6) files, combine the BED annotations into one output BED file. This can produce non-overlapping regions (--split) or unions (like bed-reduce). All other columns (and score) are ignored. Because this requires sorted inputs, it is more effcient than bed-reduce.", category="bed", experimental=true)
public class BedMerge extends AbstractOutputCommand {

public class MultinameIterator implements Iterator<MultinameBedRecord> {
private Iterator<BedRecord> it;
Expand All @@ -46,7 +46,6 @@ public MultinameBedRecord next() {
BedRecord rec = it.next();
return new MultinameBedRecord(rec);
}

}

public class MultinameBedRecord implements Comparable<MultinameBedRecord>{
Expand Down Expand Up @@ -114,6 +113,7 @@ public MultinameBedRecord clone(GenomeSpan newcoord) {

private boolean ignoreStrand = false;
private boolean single = false;
private boolean split = false;

@Option(name="single", desc="Use only one annotation for each position (prioritzed based on arg order)")
public void setSingle(boolean val) {
Expand Down Expand Up @@ -156,6 +156,11 @@ public void setIgnoreStrand(boolean val) {
this.ignoreStrand = val;
}

@Option(name="split", desc="Split overlapping regions")
public void setSplit(boolean val) {
this.split = val;
}

@Exec
public void exec() throws IOException, CommandArgumentException {
if (bedFilenames == null) {
Expand Down Expand Up @@ -207,12 +212,12 @@ public void exec() throws IOException, CommandArgumentException {
}

}
}

}

protected void processBedFiles(Strand strand, FAIFile fai) throws IOException {
processBedFiles(strand, fai, null, null);
}

protected void processBedFiles(Strand strand, FAIFile fai, Iterator<BedRecord> firstIterator, BedRecord firstRecord) throws IOException {
List<BufferedIterator<MultinameBedRecord>> srcBeds = new ArrayList<BufferedIterator<MultinameBedRecord>>();
ListSortedBufferedIterator<MultinameBedRecord> bufList = new ListSortedBufferedIterator<MultinameBedRecord>();
Expand Down Expand Up @@ -353,16 +358,36 @@ protected void processBedFiles(Strand strand, FAIFile fai, Iterator<BedRecord> f
for (String name: second.getNames()) {
if (!first.getNames().contains(name)) {
namematch = false;
break;
}
}

if (namematch) {
for (String name: first.getNames()) {
if (!second.getNames().contains(name)) {
namematch = false;
break;
}
}
}

if (namematch || !split) {
//
// A |-------|
// A |-----|
//
// *or*
//
// A |-------|
// B |-----|
// B |-----|
//
// and not splitting...
//

MultinameBedRecord merged = first.clone(new GenomeSpan(curRef, Math.min(start1, start2), Math.max(end1, end2), strand));
if (!split && !namematch) {
merged.addName(second.getNames());
}
first = merged;

} else if (start2 > start1 && start2 < end1 && end2 > end1) {
Expand Down Expand Up @@ -423,7 +448,7 @@ protected void processBedFiles(Strand strand, FAIFile fai, Iterator<BedRecord> f
write(left);
first = merged;

} else if (start2 == start1 && start2 < end1 && end2 > end1) {
} else if (start2 == start1 && end2 > end1) {
//
// A |-------|
// B |-----------|
Expand Down

0 comments on commit 8033043

Please sign in to comment.