From 7b1afc3c0c52e6c5a2fc6dbee0d7cf79aa1ebb6d Mon Sep 17 00:00:00 2001 From: Jeremiah Wala Date: Tue, 7 Jul 2015 12:12:38 -0400 Subject: [PATCH] Updated support for BamStats --- src/AlignedContig.cpp | 9 +++++++- src/BamStats.cpp | 38 ++++++++++++++++++++++++++++++- src/BamWalker.cpp | 8 ++++++- src/BreakPoint.cpp | 20 ++++++++-------- src/DiscordantCluster.cpp | 14 ------------ src/Histogram.cpp | 11 +++++++++ src/SnowTools/AlignedContig.h | 29 ++++++++++++++++++++++- src/SnowTools/BamStats.h | 1 + src/SnowTools/DiscordantCluster.h | 4 +--- src/SnowTools/Histogram.h | 2 ++ 10 files changed, 105 insertions(+), 31 deletions(-) diff --git a/src/AlignedContig.cpp b/src/AlignedContig.cpp index d5798062c..d02b4014e 100644 --- a/src/AlignedContig.cpp +++ b/src/AlignedContig.cpp @@ -236,11 +236,18 @@ void AlignedContig::setMultiMapBreakPairs() { bp.gr1 = SnowTools::GenomicRegion(it->m_align.ChrID(), it->gbreak2, it->gbreak2); bp.gr2 = SnowTools::GenomicRegion((it+1)->m_align.ChrID(), (it+1)->gbreak1, (it+1)->gbreak1); + + //debug + if (getContigName() == "c_19_15644356_15645003_48") + std::cerr << "Frag 1: " << (*it) << " Frag 2: " << (*(it+1)) << std::endl; + //bp.gr1.strand = it->align.IsReverseStrand() ? '-' : '+'; //bp.gr2.strand = (it+1)->align.IsReverseStrand() ? '+' : '-'; - bp.gr1.strand = !it->m_align.ReverseFlag() ? '+' : '-'; + + bp.gr1.strand = it->m_align.ReverseFlag() ? '-' : '+'; bp.gr2.strand = (it+1)->m_align.ReverseFlag() ? '+' : '-'; + bp.cpos1 = it->break2; // take the right-most breakpoint as the first bp.cpos2 = (it+1)->break1; // take the left-most of the next one diff --git a/src/BamStats.cpp b/src/BamStats.cpp index b27ab578c..52f0e3d6a 100644 --- a/src/BamStats.cpp +++ b/src/BamStats.cpp @@ -6,7 +6,7 @@ namespace SnowTools { BamReadGroup::BamReadGroup(const std::string& name) : reads(0), supp(0), unmap(0), qcfail(0), - duplicate(0), m_name(name) + duplicate(0), mate_unmap(0), m_name(name) { mapq = Histogram(0,100,1); @@ -18,8 +18,44 @@ BamReadGroup::BamReadGroup(const std::string& name) : reads(0), supp(0), unmap(0 } + std::ostream& operator<<(std::ostream& out, const BamStats& qc) { + out << "ReadGroup\tReadCount\tSupplementary\tUnmapped\tMateUnmapped\tQCFailed\tDuplicate\tMappingQuality\tNM\tInsertSize\tClippedBases\tMeanPhredScore\tReadLength" << std::endl; + for (auto& i : qc.m_group_map) + out << i.second << std::endl; + } + + std::ostream& operator<<(std::ostream& out, const BamReadGroup& qc) { + std::string sep = "\t"; + out << qc.m_name << sep << qc.reads << sep << + qc.supp << sep << + qc.unmap << sep << + qc.mate_unmap << sep << + qc.qcfail << sep << + qc.duplicate << sep << + qc.mapq.toFileString() << sep << + qc.nm.toFileString() << sep << + qc.isize.toFileString() << sep << + qc.clip.toFileString() << sep << + qc.phred.toFileString() << sep << + qc.len.toFileString(); + return out; + } + void BamReadGroup::addRead(BamRead &r) { + + ++reads; + if (r.SecondaryFlag()) + ++supp; + if (r.QCFailFlag()) + ++qcfail; + if (r.DuplicateFlag()) + ++duplicate; + if (!r.MappedFlag()) + ++unmap; + if (!r.MateMappedFlag()) + ++mate_unmap; + int mapqr = r.MapQuality(); if (mapqr >=0 && mapqr <= 100) mapq.addElem(mapqr); diff --git a/src/BamWalker.cpp b/src/BamWalker.cpp index 5cc67eeae..ea406e5a0 100644 --- a/src/BamWalker.cpp +++ b/src/BamWalker.cpp @@ -291,7 +291,10 @@ void BamWalker::WriteAlignment(BamRead &r) for (auto& i : m_tag_list) r.RemoveTag(i.c_str()); - sam_write1(fop, br.get(), r.raw()); + if (!fop) + std::cerr << "BamWalker ERROR in writeAlignment. Did you forget to open the Bam for writing (OpenWriteBam)? Skipping write" << std::endl; + else + sam_write1(fop, br.get(), r.raw()); } std::ostream& SnowTools::operator<<(std::ostream& out, const BamWalker& b) @@ -307,6 +310,9 @@ std::ostream& SnowTools::operator<<(std::ostream& out, const BamWalker& b) out << "CRAM" << std::endl; else if (b.fop->format.format == text_format) out << "SAM" << std::endl; + else if (b.fop == 0) + out << "NONE" << std::endl; + out << b.m_mr << std::endl; if (b.m_region.size()) { diff --git a/src/BreakPoint.cpp b/src/BreakPoint.cpp index e7e455000..b6f02a270 100644 --- a/src/BreakPoint.cpp +++ b/src/BreakPoint.cpp @@ -251,8 +251,8 @@ namespace SnowTools { } // TODO convert chr to string with treader - ss << gr1.chr+1 << sep << gr1.pos1 << sep << (gr1.strand ? '+' : '-') << sep - << gr2.chr+1 << sep << gr2.pos1 << sep << (gr2.strand ? '+' : '-') << sep + ss << gr1.chr+1 << sep << gr1.pos1 << sep << gr1.strand << sep + << gr2.chr+1 << sep << gr2.pos1 << sep << gr2.strand << sep << getSpan() << sep << mapq1 << sep << mapq2 << sep << nsplit << sep << tsplit << sep @@ -279,9 +279,9 @@ namespace SnowTools { num_align = 0; dc = tdc; - gr1.pos1 = (tdc.m_reg1.strand) ? tdc.m_reg1.pos2 : tdc.m_reg1.pos1; + gr1.pos1 = (tdc.m_reg1.strand == '+') ? tdc.m_reg1.pos2 : tdc.m_reg1.pos1; gr1.pos2 = gr1.pos1; - gr2.pos1 = (tdc.m_reg2.strand) ? tdc.m_reg2.pos2 : tdc.m_reg2.pos1; + gr2.pos1 = (tdc.m_reg2.strand == '+') ? tdc.m_reg2.pos2 : tdc.m_reg2.pos1; gr2.pos2 = gr2.pos1; gr1.chr = tdc.m_reg1.chr; gr2.chr = tdc.m_reg2.chr; @@ -454,10 +454,10 @@ namespace SnowTools { switch(++count) { case 1: gr1.chr = stoi(val) - 1; break; case 2: gr1.pos1 = stoi(val); gr1.pos2 = gr1.pos1; break; - case 3: gr1.strand = val.at(0)=='+'; break; + case 3: gr1.strand = val.at(0); break; case 4: gr2.chr = stoi(val) - 1; break; case 5: gr2.pos1 = stoi(val); gr2.pos2 = gr2.pos1; break; - case 6: gr2.strand = val.at(0)=='+'; break; + case 6: gr2.strand = val.at(0); break; //case 7: span = stoi(val); break; case 8: mapq1 = stoi(val); break; case 9: mapq2 = stoi(val); break; @@ -757,7 +757,6 @@ namespace SnowTools { GenomicRegion bp2 = gr2; bp1.pad(PAD); bp2.pad(PAD); - for (auto& d : dmap) { @@ -770,9 +769,10 @@ namespace SnowTools { bool pass = bp1reg1 && bp2reg2; - //debug - if (cname=="c_1_6524299_6527299_3") - std::cerr << " HERE " << d.first << " " << d.second << " pass " << pass << std::endl; + if (cname=="c_19_15644356_15645003_48") + std::cerr << " HERE " << d.first << " " << d.second << " pass " << pass << std::endl << + "BREAKPOINT " << (*this) << " d.second.m_reg1 " << d.second.m_reg1 << " d.second.m_reg2 " << d.second.m_reg2 << + " bp1 " << bp1 << " bp2 " << bp2 << std::endl; if (pass) { diff --git a/src/DiscordantCluster.cpp b/src/DiscordantCluster.cpp index 1007e736e..1d42a8fc2 100644 --- a/src/DiscordantCluster.cpp +++ b/src/DiscordantCluster.cpp @@ -180,20 +180,6 @@ namespace SnowTools { return mean; } - double DiscordantCluster::getMeanMapq() const - { - double mean = 0; - std::vector tmapq; - for (auto& i : mates) - tmapq.push_back(i.second.MapQuality()); - for (auto& i : reads) - tmapq.push_back(i.second.MapQuality()); - - if (tmapq.size() > 0) - mean = accumulate(tmapq.begin(), tmapq.end(), 0.0) / tmapq.size(); - return mean; - } - std::string DiscordantCluster::toRegionString() const { int pos1 = (m_reg1.strand == '+') ? m_reg1.pos2 : m_reg1.pos1; diff --git a/src/Histogram.cpp b/src/Histogram.cpp index c1720f7d9..7eed6cf98 100644 --- a/src/Histogram.cpp +++ b/src/Histogram.cpp @@ -48,9 +48,11 @@ v * 7. MISCELLANEOUS */ #include "SnowTools/Histogram.h" +#include "SnowTools/SnowUtils.h" #include #include #include +#include #define BINARY_SEARCH 1 @@ -107,6 +109,15 @@ void Histogram::addElem(const int32_t& elem) { ++m_bins[retrieveBinID(elem)]; } +std::string Histogram::toFileString() const { + std::stringstream ss; + for (auto& i : m_bins) + if (i.m_count) + ss << i.bounds.first << "_" << i.bounds.second << "_" << i.m_count << ","; + return(cutLastChar(ss.str())); // trim off last comma + +} + size_t Histogram::retrieveBinID(const int32_t& elem) const { if (elem < m_bins[0].bounds.first) diff --git a/src/SnowTools/AlignedContig.h b/src/SnowTools/AlignedContig.h index c3e891d14..d1469fef3 100644 --- a/src/SnowTools/AlignedContig.h +++ b/src/SnowTools/AlignedContig.h @@ -7,6 +7,7 @@ #include "SnowTools/BreakPoint.h" #include "SnowTools/DiscordantCluster.h" #include "SnowTools/BWAWrapper.h" +#include "SnowTools/BamWalker.h" namespace SnowTools { @@ -55,8 +56,15 @@ namespace SnowTools { */ bool checkLocal(const GenomicRegion& window); - const BPVec& getIndelBreaks() const { return m_indel_breaks; } + const BPVec& getIndelBreaks() const { return m_indel_breaks; } + /*! Write the alignment record to a BAM file + */ + void writeToBAM(BamWalker& bw) { + bw.WriteAlignment(m_align); + } + + private: BPVec m_indel_breaks; /**< indel variants on this alignment */ @@ -210,6 +218,25 @@ namespace SnowTools { */ bool hasVariant() const; + /*! Write all of the alignment records to a BAM file + * @param bw BamWalker opened with OpenWriteBam + */ + void writeToBAM(BamWalker& bw) { + for (auto& i : m_frag_v) { + std::cerr << "write to bam " << i << std::endl; + i.writeToBAM(bw); + } + } + + /*! Write all of the sequencing reads as aligned to contig to a BAM file + * @param bw BamWalker opened with OpenWriteBam + */ + void writeAlignedReadsToBAM(BamWalker& bw) { + for (auto& i : m_bamreads) + bw.WriteAlignment(i); + } + + bool hasLocal() const { for (auto& i : m_frag_v) if (i.local) return true; return false; } /*! @function retrieves all of the breakpoints by combining indels with global mutli-map break diff --git a/src/SnowTools/BamStats.h b/src/SnowTools/BamStats.h index 9452ff28b..637ce701d 100644 --- a/src/SnowTools/BamStats.h +++ b/src/SnowTools/BamStats.h @@ -43,6 +43,7 @@ class BamReadGroup { size_t unmap; size_t qcfail; size_t duplicate; + size_t mate_unmap; Histogram mapq; Histogram nm; diff --git a/src/SnowTools/DiscordantCluster.h b/src/SnowTools/DiscordantCluster.h index 83dad858f..ec8f2608b 100644 --- a/src/SnowTools/DiscordantCluster.h +++ b/src/SnowTools/DiscordantCluster.h @@ -44,9 +44,7 @@ namespace SnowTools void addMateReads(const BamReadVector& bav); // return the mean mapping quality for this cluster - double getMeanMapq(bool mate) const; - - double getMeanMapq() const; + double getMeanMapq(bool mate = false) const; /** Return the discordant cluster as a string with just coordinates */ std::string toRegionString() const; diff --git a/src/SnowTools/Histogram.h b/src/SnowTools/Histogram.h index f945e6439..a4faffcc1 100644 --- a/src/SnowTools/Histogram.h +++ b/src/SnowTools/Histogram.h @@ -104,6 +104,8 @@ class Histogram { */ Histogram(const int32_t& start, const int32_t& end, const uint32_t& width); + std::string toFileString() const; + friend std::ostream& operator<<(std::ostream &out, const Histogram &h) { for (auto& i : h.m_bins) out << i << std::endl;