From fdcdf4c4721c874b9c47425fead17d130f90440f Mon Sep 17 00:00:00 2001 From: walaj Date: Wed, 19 Apr 2017 14:18:33 -0400 Subject: [PATCH 1/2] added GetFloatTag --- SeqLib/BamRecord.h | 20 ++++++++++++++++++++ htslib | 2 +- src/BamRecord.cpp | 20 ++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/SeqLib/BamRecord.h b/SeqLib/BamRecord.h index 45a6e625c..a8bd5be88 100644 --- a/SeqLib/BamRecord.h +++ b/SeqLib/BamRecord.h @@ -664,6 +664,13 @@ class BamRecord { */ bool GetZTag(const std::string& tag, std::string& s) const; + /** Get a string of either Z, f or i type. Useful if tag type not known at compile time. + * @param tag Name of the tag. eg "XP" + * @param s The string to be filled in with the tag information + * @return Returns true if the tag is present and is either Z or i, even if empty. Return false if no tag or not Z or i. + */ + bool GetTag(const std::string& tag, std::string& s) const; + /** Get a vector of type int from a Z tag delimited by "^" * Smart-tags allow one to store vectors of strings, ints or doubles in the alignment tags, and * do not require an additional data structure on top of bseq1_t. @@ -703,6 +710,19 @@ class BamRecord { return true; } + /** Get a float (f) tag + * @param tag Name of the tag. eg "AS" + * @param t Value to be filled in with the tag value. + * @return Return true if the tag exists. + */ + inline bool GetFloatTag(const std::string& tag, float& t) const { + uint8_t* p = bam_aux_get(b.get(),tag.c_str()); + if (!p) + return false; + t = bam_aux2f(p); + return true; + } + /** Add a string (Z) tag * @param tag Name of the tag. eg "XP" * @param val Value for the tag diff --git a/htslib b/htslib index 5163833ed..293a426d2 160000 --- a/htslib +++ b/htslib @@ -1 +1 @@ -Subproject commit 5163833ede355f8c2a6788780c55d7598279e767 +Subproject commit 293a426d2348229d3fede23d3b212c6c9bedaeb8 diff --git a/src/BamRecord.cpp b/src/BamRecord.cpp index 7ad8813ba..ae96628bf 100644 --- a/src/BamRecord.cpp +++ b/src/BamRecord.cpp @@ -444,6 +444,26 @@ namespace SeqLib { bam_aux_append(b.get(), tag.data(), 'Z', val.length()+1, (uint8_t*)val.c_str()); } + bool BamRecord::GetTag(const std::string& tag, std::string& s) const { + if (GetZTag(tag, s)) + return true; + int32_t t; + if (GetIntTag(tag, t)) { + std::stringstream ss; + ss << t; + s = ss.str(); + return true; + } + float f; + if (GetFloatTag(tag, f)) { + std::stringstream ss; + ss << f; + s = ss.str(); + return true; + } + return false; + } + bool BamRecord::GetZTag(const std::string& tag, std::string& s) const { uint8_t* p = bam_aux_get(b.get(),tag.c_str()); if (!p) From ad8a854b44b26260b49cbe2ae2ef714a16f61e49 Mon Sep 17 00:00:00 2001 From: walaj Date: Tue, 25 Apr 2017 13:45:02 -0400 Subject: [PATCH 2/2] BamReader now throws error if sam_read1 value < -1 --- SeqLib/BamReader.h | 3 ++- htslib | 2 +- src/BamReader.cpp | 35 +++++++++++++++++++++++++++-------- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/SeqLib/BamReader.h b/SeqLib/BamReader.h index d0f3aa03b..684c2e8da 100644 --- a/SeqLib/BamReader.h +++ b/SeqLib/BamReader.h @@ -42,7 +42,8 @@ namespace SeqLib { private: // do the read loading - bool load_read(BamRecord& r); + // the return value here is just passed along from sam_read1 + int32_t load_read(BamRecord& r); void reset() { empty = true; diff --git a/htslib b/htslib index 293a426d2..5163833ed 160000 --- a/htslib +++ b/htslib @@ -1 +1 @@ -Subproject commit 293a426d2348229d3fede23d3b212c6c9bedaeb8 +Subproject commit 5163833ede355f8c2a6788780c55d7598279e767 diff --git a/src/BamReader.cpp b/src/BamReader.cpp index 7c82748c9..60df94352 100644 --- a/src/BamReader.cpp +++ b/src/BamReader.cpp @@ -224,13 +224,24 @@ bool BamReader::GetNextRecord(BamRecord& r) { // shortcut if we have only a single bam if (m_bams.size() == 1) { + if (m_bams.begin()->second.fp.get() == NULL || m_bams.begin()->second.mark_for_closure) // cant read if not opened return false; - if (m_bams.begin()->second.load_read(r)) { // try to read + + // try and get the next read + int32_t status = m_bams.begin()->second.load_read(r); + if (status >= 0) return true; + if (status == -1) { + // didn't find anything, clear it + m_bams.begin()->second.mark_for_closure = true; + return false; } - // didn't find anything, clear it - m_bams.begin()->second.mark_for_closure = true; + + // run time error + std::stringstream ss; + ss << "sam_read1 return status: " << status << " file: " << m_bams.begin()->first; + throw std::runtime_error(ss.str()); return false; } @@ -253,10 +264,17 @@ bool BamReader::GetNextRecord(BamRecord& r) { continue; // load the next read - if (!tb->load_read(r)) { // if cant load, mark for closing + int32_t status = tb->load_read(r); + if (status == -1) { + // can't load, so mark for closing tb->empty = true; tb->mark_for_closure = true; // no more reads in this BAM continue; + } else if (status < 0) { // error sent back from sam_read1 + // run time error + std::stringstream ss; + ss << "sam_read1 return status: " << status << " file: " << bam->first; + throw std::runtime_error(ss.str()); } } @@ -302,7 +320,7 @@ std::string BamReader::PrintRegions() const { } - bool _Bam::load_read(BamRecord& r) { + int32_t _Bam::load_read(BamRecord& r) { // allocated the memory bam1_t* b = bam_init1(); @@ -310,6 +328,7 @@ std::string BamReader::PrintRegions() const { if (hts_itr.get() == NULL) { valid = sam_read1(fp.get(), m_hdr.get_(), b); + if (valid < 0) { #ifdef DEBUG_WALKER @@ -317,7 +336,7 @@ std::string BamReader::PrintRegions() const { #endif //goto endloop; bam_destroy1(b); - return false; + return valid; } } else { @@ -336,7 +355,7 @@ std::string BamReader::PrintRegions() const { ++m_region_idx; // increment to next region if (m_region_idx >= m_region->size()) { bam_destroy1(b); - return false; + return valid; } //goto endloop; @@ -351,7 +370,7 @@ std::string BamReader::PrintRegions() const { next_read.assign(b); // assign the shared_ptr for the bam1_t r = next_read; - return true; + return valid; } std::ostream& operator<<(std::ostream& out, const BamReader& b)