From ec4371ce9207a89a6d71c3e7256825f1fa83d6c6 Mon Sep 17 00:00:00 2001 From: Itolstoganov Date: Tue, 27 Jun 2023 14:14:12 +0200 Subject: [PATCH] Sparsify barcode edge coverage --- .../common/barcode_index/barcode_index.hpp | 124 ++++++++---------- .../barcode_index/barcode_info_extractor.hpp | 11 -- .../print_barcodes_command.hpp | 3 - 3 files changed, 55 insertions(+), 83 deletions(-) diff --git a/assembler/src/common/barcode_index/barcode_index.hpp b/assembler/src/common/barcode_index/barcode_index.hpp index 269730e4e0..cf6a557775 100644 --- a/assembler/src/common/barcode_index/barcode_index.hpp +++ b/assembler/src/common/barcode_index/barcode_index.hpp @@ -15,6 +15,7 @@ #include #include +#include using std::string; using std::istringstream; @@ -307,21 +308,17 @@ inline std::istream& operator >>(std::istream& os, SimpleBarcodeInfo& info) */ class FrameBarcodeInfo { public: - typedef boost::dynamic_bitset<> IsOnFrameT; - /** * * @param frames Number of bin in the edge * @return empty info */ - FrameBarcodeInfo(size_t frames = 0): count_(0), is_on_frame_(), leftmost_index_(frames), rightmost_index_(0) { - is_on_frame_.resize(frames, false); - } + FrameBarcodeInfo(size_t frames = 0): count_(0), covered_bins_(), leftmost_index_(frames), rightmost_index_(0) {} void Update(size_t count, size_t left_frame, size_t right_frame) { count_ += count; for (size_t i = left_frame; i <= right_frame; ++i) { - is_on_frame_.set(i); + covered_bins_.insert(i); } leftmost_index_ = std::min(left_frame, leftmost_index_); rightmost_index_ = std::max(right_frame, rightmost_index_); @@ -330,9 +327,11 @@ class FrameBarcodeInfo { void Update(const FrameBarcodeInfo& other) { TRACE(count_); TRACE(other.count_); - TRACE(is_on_frame_.size()); - TRACE(other.is_on_frame_.size()); - is_on_frame_ |= other.is_on_frame_; + TRACE(covered_bins_.size()); + TRACE(other.covered_bins_.size()); + for (const auto &frame: other.covered_bins_) { + covered_bins_.insert(frame); + } leftmost_index_ = std::min(leftmost_index_, other.leftmost_index_); rightmost_index_ = std::max(rightmost_index_, other.rightmost_index_); count_ += other.count_; @@ -359,32 +358,12 @@ class FrameBarcodeInfo { return rightmost_index_; } - const IsOnFrameT& GetBitSet() const { - return is_on_frame_; - } - - /** - * @param frame index of bin - * @return true if bin is barcoded, false otherwise - */ - bool GetFrame(size_t frame) const { - return is_on_frame_[frame]; - } - - /** - * - * @return number of frames - */ - size_t GetSize() const { - return is_on_frame_.size(); - } - /** * * @return number of barcoded bins */ size_t GetCovered() const { - return is_on_frame_.count(); + return covered_bins_.size(); } void SetCount(size_t count) { @@ -399,33 +378,28 @@ class FrameBarcodeInfo { rightmost_index_ = index; } - void SetBitSet(const IsOnFrameT &bitset) { - is_on_frame_ = bitset; - } - void BinRead(std::istream &str) { using io::binary::BinRead; auto count = BinRead(str); SetCount(count); - auto set_positions = BinRead>(str); - VERIFY_DEV(set_positions.back() < is_on_frame_.size()); - TRACE("Last position: " << set_positions.back()); - TRACE("Bitset size: " << is_on_frame_.size()); - for (const auto &pos: set_positions) { + auto num_positions = BinRead(str); + size_t min_pos = std::numeric_limits::max(); + size_t max_pos = 0; + for (size_t i = 0; i < num_positions; ++i) { + auto pos = BinRead(str); TRACE("Position: " << pos); - is_on_frame_.set(pos, true); - } - - SetLeftMost(is_on_frame_.find_first()); - size_t rightmost = 0; - for (size_t i = is_on_frame_.size() - 1; i > 0; --i) { - if (is_on_frame_.test(i)) { - rightmost = i; - break; + covered_bins_.insert(pos); + if (pos < min_pos) { + min_pos = pos; + } + if (pos > max_pos) { + max_pos = pos; } } - SetRightMost(rightmost); + + SetLeftMost(min_pos); + SetRightMost(max_pos); TRACE("Leftmost: " << GetLeftMost()); TRACE("Rightmost: " << GetRightMost()); } @@ -433,16 +407,11 @@ class FrameBarcodeInfo { void BinWrite(std::ostream &str) const { using io::binary::BinWrite; BinWrite(str, GetCount()); + BinWrite(str, GetCovered()); - std::vector set_positions; - size_t current_set_pos = GetBitSet().find_first(); - TRACE("Size: " << GetBitSet().size()); - while (current_set_pos != IsOnFrameT::npos) { - TRACE("Current set position: " << current_set_pos); - set_positions.push_back(current_set_pos); - current_set_pos = GetBitSet().find_next(current_set_pos); + for (const size_t &pos: covered_bins_) { + BinWrite(str, pos); } - BinWrite(str, set_positions); } @@ -455,9 +424,9 @@ class FrameBarcodeInfo { */ size_t count_; /** - * `is_on_frame[i]` is true iff ith bin is barcoded + * Bins covered by the barcode */ - boost::dynamic_bitset<> is_on_frame_; + std::unordered_set covered_bins_; /** * Leftmost barcoded bin */ @@ -472,23 +441,40 @@ class FrameBarcodeInfo { inline std::ostream& operator <<(std::ostream& os, const FrameBarcodeInfo& info) { - os << info.count_ << " " << info.is_on_frame_; + os << info.count_ << " " << info.covered_bins_.size(); + for (const auto &bin: info.covered_bins_) { + os << bin << " "; + } return os; } inline std::istream& operator >>(std::istream& is, FrameBarcodeInfo& info) { - is >> info.count_; - is >> info.is_on_frame_; - info.leftmost_index_ = info.is_on_frame_.find_first(); - size_t rightmost = 0; - for (size_t i = info.is_on_frame_.size() - 1; i > 0; --i) { - if (info.is_on_frame_.test(i)) { - rightmost = i; - break; + using io::binary::BinRead; + size_t count; + is >> count; + info.SetCount(count); + + size_t num_of_bins; + is >> num_of_bins; + size_t min_pos = std::numeric_limits::max(); + size_t max_pos = 0; + for (size_t i = 0; i < num_of_bins; ++i) { + size_t pos = 0; + is >> pos; + TRACE("Position: " << pos); + if (pos < min_pos) { + min_pos = pos; + } + if (pos > max_pos) { + max_pos = pos; } + info.covered_bins_.insert(pos); } - info.rightmost_index_ = rightmost; + info.SetLeftMost(min_pos); + info.SetRightMost(max_pos); + TRACE("Leftmost: " << info.GetLeftMost()); + TRACE("Rightmost: " << info.GetRightMost()); return is; } diff --git a/assembler/src/common/barcode_index/barcode_info_extractor.hpp b/assembler/src/common/barcode_index/barcode_info_extractor.hpp index 7568b61aeb..41f0d4254f 100644 --- a/assembler/src/common/barcode_index/barcode_info_extractor.hpp +++ b/assembler/src/common/barcode_index/barcode_info_extractor.hpp @@ -341,17 +341,6 @@ class FrameBarcodeIndexInfoExtractorTemplate : public BarcodeIndexInfoExtractor< return this->GetInfo(edge, barcode).GetRightMost(); } - - /** - * - * @param edge - * @param barcode - * @return bitset representing barcoded bins of the edge - */ - const boost::dynamic_bitset<>& GetBitSet(const EdgeId& edge, const BarcodeId& barcode) const { - return this->GetInfo(edge, barcode).GetBitSet(); - } - /** * @param edge * @return length of the bin diff --git a/assembler/src/projects/online_vis/statistics_commands/print_barcodes_command.hpp b/assembler/src/projects/online_vis/statistics_commands/print_barcodes_command.hpp index e1d5c1de5c..ba97f33297 100644 --- a/assembler/src/projects/online_vis/statistics_commands/print_barcodes_command.hpp +++ b/assembler/src/projects/online_vis/statistics_commands/print_barcodes_command.hpp @@ -107,10 +107,7 @@ namespace online_visualization { PrintElement(bitset, bitset_width, fout); } else { const size_t reads = barcode_extractor_.GetNumberOfReads(edge, barcode); - const auto bitset = barcode_extractor_.GetBitSet(edge, barcode); - const size_t bitset_width = bitset.size() + 5; PrintElement(reads, number_of_reads_width_, fout); - PrintElement(bitset, bitset_width, fout); } } fout << endl;