Skip to content

Commit

Permalink
Updated support for BamStats
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeremiah Wala committed Jul 7, 2015
1 parent f50d16f commit 7b1afc3
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 31 deletions.
9 changes: 8 additions & 1 deletion src/AlignedContig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,18 @@ void AlignedContig::setMultiMapBreakPairs() {

bp.gr1 = SnowTools::GenomicRegion(it->m_align.ChrID(), it->gbreak2, it->gbreak2);
bp.gr2 = SnowTools::GenomicRegion((it+1)->m_align.ChrID(), (it+1)->gbreak1, (it+1)->gbreak1);

//debug
if (getContigName() == "c_19_15644356_15645003_48")
std::cerr << "Frag 1: " << (*it) << " Frag 2: " << (*(it+1)) << std::endl;

//bp.gr1.strand = it->align.IsReverseStrand() ? '-' : '+';
//bp.gr2.strand = (it+1)->align.IsReverseStrand() ? '+' : '-';
bp.gr1.strand = !it->m_align.ReverseFlag() ? '+' : '-';

bp.gr1.strand = it->m_align.ReverseFlag() ? '-' : '+';
bp.gr2.strand = (it+1)->m_align.ReverseFlag() ? '+' : '-';


bp.cpos1 = it->break2; // take the right-most breakpoint as the first
bp.cpos2 = (it+1)->break1; // take the left-most of the next one

Expand Down
38 changes: 37 additions & 1 deletion src/BamStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace SnowTools {

BamReadGroup::BamReadGroup(const std::string& name) : reads(0), supp(0), unmap(0), qcfail(0),
duplicate(0), m_name(name)
duplicate(0), mate_unmap(0), m_name(name)
{

mapq = Histogram(0,100,1);
Expand All @@ -18,8 +18,44 @@ BamReadGroup::BamReadGroup(const std::string& name) : reads(0), supp(0), unmap(0

}

std::ostream& operator<<(std::ostream& out, const BamStats& qc) {
out << "ReadGroup\tReadCount\tSupplementary\tUnmapped\tMateUnmapped\tQCFailed\tDuplicate\tMappingQuality\tNM\tInsertSize\tClippedBases\tMeanPhredScore\tReadLength" << std::endl;
for (auto& i : qc.m_group_map)
out << i.second << std::endl;
}

std::ostream& operator<<(std::ostream& out, const BamReadGroup& qc) {
std::string sep = "\t";
out << qc.m_name << sep << qc.reads << sep <<
qc.supp << sep <<
qc.unmap << sep <<
qc.mate_unmap << sep <<
qc.qcfail << sep <<
qc.duplicate << sep <<
qc.mapq.toFileString() << sep <<
qc.nm.toFileString() << sep <<
qc.isize.toFileString() << sep <<
qc.clip.toFileString() << sep <<
qc.phred.toFileString() << sep <<
qc.len.toFileString();
return out;
}

void BamReadGroup::addRead(BamRead &r)
{

++reads;
if (r.SecondaryFlag())
++supp;
if (r.QCFailFlag())
++qcfail;
if (r.DuplicateFlag())
++duplicate;
if (!r.MappedFlag())
++unmap;
if (!r.MateMappedFlag())
++mate_unmap;

int mapqr = r.MapQuality();
if (mapqr >=0 && mapqr <= 100)
mapq.addElem(mapqr);
Expand Down
8 changes: 7 additions & 1 deletion src/BamWalker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,10 @@ void BamWalker::WriteAlignment(BamRead &r)
for (auto& i : m_tag_list)
r.RemoveTag(i.c_str());

sam_write1(fop, br.get(), r.raw());
if (!fop)
std::cerr << "BamWalker ERROR in writeAlignment. Did you forget to open the Bam for writing (OpenWriteBam)? Skipping write" << std::endl;
else
sam_write1(fop, br.get(), r.raw());
}

std::ostream& SnowTools::operator<<(std::ostream& out, const BamWalker& b)
Expand All @@ -307,6 +310,9 @@ std::ostream& SnowTools::operator<<(std::ostream& out, const BamWalker& b)
out << "CRAM" << std::endl;
else if (b.fop->format.format == text_format)
out << "SAM" << std::endl;
else if (b.fop == 0)
out << "NONE" << std::endl;


out << b.m_mr << std::endl;
if (b.m_region.size()) {
Expand Down
20 changes: 10 additions & 10 deletions src/BreakPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ namespace SnowTools {
}

// TODO convert chr to string with treader
ss << gr1.chr+1 << sep << gr1.pos1 << sep << (gr1.strand ? '+' : '-') << sep
<< gr2.chr+1 << sep << gr2.pos1 << sep << (gr2.strand ? '+' : '-') << sep
ss << gr1.chr+1 << sep << gr1.pos1 << sep << gr1.strand << sep
<< gr2.chr+1 << sep << gr2.pos1 << sep << gr2.strand << sep
<< getSpan() << sep
<< mapq1 << sep << mapq2 << sep
<< nsplit << sep << tsplit << sep
Expand All @@ -279,9 +279,9 @@ namespace SnowTools {
num_align = 0;
dc = tdc;

gr1.pos1 = (tdc.m_reg1.strand) ? tdc.m_reg1.pos2 : tdc.m_reg1.pos1;
gr1.pos1 = (tdc.m_reg1.strand == '+') ? tdc.m_reg1.pos2 : tdc.m_reg1.pos1;
gr1.pos2 = gr1.pos1;
gr2.pos1 = (tdc.m_reg2.strand) ? tdc.m_reg2.pos2 : tdc.m_reg2.pos1;
gr2.pos1 = (tdc.m_reg2.strand == '+') ? tdc.m_reg2.pos2 : tdc.m_reg2.pos1;
gr2.pos2 = gr2.pos1;
gr1.chr = tdc.m_reg1.chr;
gr2.chr = tdc.m_reg2.chr;
Expand Down Expand Up @@ -454,10 +454,10 @@ namespace SnowTools {
switch(++count) {
case 1: gr1.chr = stoi(val) - 1; break;
case 2: gr1.pos1 = stoi(val); gr1.pos2 = gr1.pos1; break;
case 3: gr1.strand = val.at(0)=='+'; break;
case 3: gr1.strand = val.at(0); break;
case 4: gr2.chr = stoi(val) - 1; break;
case 5: gr2.pos1 = stoi(val); gr2.pos2 = gr2.pos1; break;
case 6: gr2.strand = val.at(0)=='+'; break;
case 6: gr2.strand = val.at(0); break;
//case 7: span = stoi(val); break;
case 8: mapq1 = stoi(val); break;
case 9: mapq2 = stoi(val); break;
Expand Down Expand Up @@ -757,7 +757,6 @@ namespace SnowTools {
GenomicRegion bp2 = gr2;
bp1.pad(PAD);
bp2.pad(PAD);


for (auto& d : dmap)
{
Expand All @@ -770,9 +769,10 @@ namespace SnowTools {

bool pass = bp1reg1 && bp2reg2;

//debug
if (cname=="c_1_6524299_6527299_3")
std::cerr << " HERE " << d.first << " " << d.second << " pass " << pass << std::endl;
if (cname=="c_19_15644356_15645003_48")
std::cerr << " HERE " << d.first << " " << d.second << " pass " << pass << std::endl <<
"BREAKPOINT " << (*this) << " d.second.m_reg1 " << d.second.m_reg1 << " d.second.m_reg2 " << d.second.m_reg2 <<
" bp1 " << bp1 << " bp2 " << bp2 << std::endl;

if (pass)
{
Expand Down
14 changes: 0 additions & 14 deletions src/DiscordantCluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,20 +180,6 @@ namespace SnowTools {
return mean;
}

double DiscordantCluster::getMeanMapq() const
{
double mean = 0;
std::vector<int> tmapq;
for (auto& i : mates)
tmapq.push_back(i.second.MapQuality());
for (auto& i : reads)
tmapq.push_back(i.second.MapQuality());

if (tmapq.size() > 0)
mean = accumulate(tmapq.begin(), tmapq.end(), 0.0) / tmapq.size();
return mean;
}

std::string DiscordantCluster::toRegionString() const
{
int pos1 = (m_reg1.strand == '+') ? m_reg1.pos2 : m_reg1.pos1;
Expand Down
11 changes: 11 additions & 0 deletions src/Histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ v * 7. MISCELLANEOUS
*/

#include "SnowTools/Histogram.h"
#include "SnowTools/SnowUtils.h"
#include <fstream>
#include <cmath>
#include <algorithm>
#include <sstream>

#define BINARY_SEARCH 1

Expand Down Expand Up @@ -107,6 +109,15 @@ void Histogram::addElem(const int32_t& elem) {
++m_bins[retrieveBinID(elem)];
}

std::string Histogram::toFileString() const {
std::stringstream ss;
for (auto& i : m_bins)
if (i.m_count)
ss << i.bounds.first << "_" << i.bounds.second << "_" << i.m_count << ",";
return(cutLastChar(ss.str())); // trim off last comma

}

size_t Histogram::retrieveBinID(const int32_t& elem) const {

if (elem < m_bins[0].bounds.first)
Expand Down
29 changes: 28 additions & 1 deletion src/SnowTools/AlignedContig.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "SnowTools/BreakPoint.h"
#include "SnowTools/DiscordantCluster.h"
#include "SnowTools/BWAWrapper.h"
#include "SnowTools/BamWalker.h"

namespace SnowTools {

Expand Down Expand Up @@ -55,8 +56,15 @@ namespace SnowTools {
*/
bool checkLocal(const GenomicRegion& window);

const BPVec& getIndelBreaks() const { return m_indel_breaks; }
const BPVec& getIndelBreaks() const { return m_indel_breaks; }

/*! Write the alignment record to a BAM file
*/
void writeToBAM(BamWalker& bw) {
bw.WriteAlignment(m_align);
}


private:

BPVec m_indel_breaks; /**< indel variants on this alignment */
Expand Down Expand Up @@ -210,6 +218,25 @@ namespace SnowTools {
*/
bool hasVariant() const;

/*! Write all of the alignment records to a BAM file
* @param bw BamWalker opened with OpenWriteBam
*/
void writeToBAM(BamWalker& bw) {
for (auto& i : m_frag_v) {
std::cerr << "write to bam " << i << std::endl;
i.writeToBAM(bw);
}
}

/*! Write all of the sequencing reads as aligned to contig to a BAM file
* @param bw BamWalker opened with OpenWriteBam
*/
void writeAlignedReadsToBAM(BamWalker& bw) {
for (auto& i : m_bamreads)
bw.WriteAlignment(i);
}


bool hasLocal() const { for (auto& i : m_frag_v) if (i.local) return true; return false; }

/*! @function retrieves all of the breakpoints by combining indels with global mutli-map break
Expand Down
1 change: 1 addition & 0 deletions src/SnowTools/BamStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class BamReadGroup {
size_t unmap;
size_t qcfail;
size_t duplicate;
size_t mate_unmap;

Histogram mapq;
Histogram nm;
Expand Down
4 changes: 1 addition & 3 deletions src/SnowTools/DiscordantCluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ namespace SnowTools
void addMateReads(const BamReadVector& bav);

// return the mean mapping quality for this cluster
double getMeanMapq(bool mate) const;

double getMeanMapq() const;
double getMeanMapq(bool mate = false) const;

/** Return the discordant cluster as a string with just coordinates */
std::string toRegionString() const;
Expand Down
2 changes: 2 additions & 0 deletions src/SnowTools/Histogram.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ class Histogram {
*/
Histogram(const int32_t& start, const int32_t& end, const uint32_t& width);

std::string toFileString() const;

friend std::ostream& operator<<(std::ostream &out, const Histogram &h) {
for (auto& i : h.m_bins)
out << i << std::endl;
Expand Down

0 comments on commit 7b1afc3

Please sign in to comment.