From 3145f9294ddc6a417a7e48c3e4f9001eea3eaee5 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Fri, 28 Jul 2023 16:09:50 +0200 Subject: [PATCH] Try less memory usage (#516) * Try less memory usage * fixes * Update columnstore2.cpp * Add benchmark * Add benchmark * Add benchmark --- core/formats/columnstore2.cpp | 3 ++ core/formats/columnstore2.hpp | 37 +++++++++-------- microbench/CMakeLists.txt | 36 ++++++++++++++++ microbench/column_writer_benchmark.cpp | 57 ++++++++++++++++++++++++++ microbench/crc_benchmark.cpp | 22 ++++++++++ 5 files changed, 137 insertions(+), 18 deletions(-) create mode 100644 microbench/column_writer_benchmark.cpp diff --git a/core/formats/columnstore2.cpp b/core/formats/columnstore2.cpp index a4578ec65..a6f763345 100644 --- a/core/formats/columnstore2.cpp +++ b/core/formats/columnstore2.cpp @@ -1286,6 +1286,9 @@ void column::flush_block() { math::ceil64(docs_count, packed::BLOCK_SIZE_64); auto* begin = addr_table_.begin(); auto* end = begin + addr_table_size; + if (auto* it = addr_table_.current(); it != end) { + std::memset(it, 0, (end - it) * sizeof(*it)); + } bool all_equal = !data_.file.length(); if (!all_equal) { diff --git a/core/formats/columnstore2.hpp b/core/formats/columnstore2.hpp index d16b797fd..299d4b1ba 100644 --- a/core/formats/columnstore2.hpp +++ b/core/formats/columnstore2.hpp @@ -90,45 +90,46 @@ class column final : public irs::column_output { class address_table { public: + address_table(ManagedTypedAllocator alloc) : alloc_{alloc} { + offsets_ = alloc_.allocate(kBlockSize); + offset_ = offsets_; + } + + ~address_table() { alloc_.deallocate(offsets_, kBlockSize); } + uint64_t back() const noexcept { - IRS_ASSERT(offset_ > offsets_); - return *(offset_ - 1); + IRS_ASSERT(offsets_ < offset_); + return offset_[-1]; } void push_back(uint64_t offset) noexcept { - IRS_ASSERT(offset_ >= offsets_); IRS_ASSERT(offset_ < offsets_ + kBlockSize); *offset_++ = offset; - IRS_ASSERT(offset >= offset_[-1]); } void pop_back() noexcept { - IRS_ASSERT(offset_ > offsets_); - *--offset_ = 0; + IRS_ASSERT(offsets_ < offset_); + --offset_; } - // returns number of items to be flushed uint32_t size() const noexcept { - IRS_ASSERT(offset_ >= offsets_); - return uint32_t(offset_ - offsets_); + return static_cast(offset_ - offsets_); } bool empty() const noexcept { return offset_ == offsets_; } - bool full() const noexcept { return offset_ == std::end(offsets_); } + bool full() const noexcept { return offset_ == offsets_ + kBlockSize; } - void reset() noexcept { - std::memset(offsets_, 0, sizeof offsets_); - offset_ = std::begin(offsets_); - } + void reset() noexcept { offset_ = offsets_; } - uint64_t* begin() noexcept { return std::begin(offsets_); } + uint64_t* begin() noexcept { return offsets_; } uint64_t* current() noexcept { return offset_; } - uint64_t* end() noexcept { return std::end(offsets_); } + uint64_t* end() noexcept { return offsets_ + kBlockSize; } private: - uint64_t offsets_[kBlockSize]{}; - uint64_t* offset_{offsets_}; + ManagedTypedAllocator alloc_; + uint64_t* offsets_{nullptr}; + uint64_t* offset_{nullptr}; }; void prepare(doc_id_t key); diff --git a/microbench/CMakeLists.txt b/microbench/CMakeLists.txt index 89d7ff6e9..08224f0dd 100644 --- a/microbench/CMakeLists.txt +++ b/microbench/CMakeLists.txt @@ -58,3 +58,39 @@ include_directories(iresearch-microbench $ $ ) + +add_executable(iresearch-memory-file-bench + memory_file_benchmark.cpp + ) + +set_ipo(iresearch-memory-file-bench) + +add_dependencies(iresearch-memory-file-bench + iresearch-static + ) + +target_include_directories(iresearch-memory-file-bench + PRIVATE ${PROJECT_BINARY_DIR}/core + ) + +target_link_libraries(iresearch-memory-file-bench + iresearch-static + ) + +add_executable(iresearch-column-writer + column_writer_benchmark.cpp + ) + +set_ipo(iresearch-column-writer) + +add_dependencies(iresearch-column-writer + iresearch-static + ) + +target_include_directories(iresearch-column-writer + PRIVATE ${PROJECT_BINARY_DIR}/core + ) + +target_link_libraries(iresearch-column-writer + iresearch-static + ) diff --git a/microbench/column_writer_benchmark.cpp b/microbench/column_writer_benchmark.cpp new file mode 100644 index 000000000..44d03787d --- /dev/null +++ b/microbench/column_writer_benchmark.cpp @@ -0,0 +1,57 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Valerii Mironov +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +#include "formats/columnstore2.hpp" + +static constexpr size_t kThreads = 1; +static constexpr size_t kColumns = 1000; +static constexpr size_t kColumnsIter = kColumns * 10; + +static void WriteFile(std::mt19937_64& rng) { + auto writer = irs::columnstore2::make_writer( + irs::columnstore2::Version::kMax, false, irs::IResourceManager::kNoop); + // const auto size = size_t{1} << kFileSizePower(rng); + for (size_t i = 0; i != kColumnsIter; ++i) { + if (i % kColumns == 0) { + writer->rollback(); + } + writer->push_column({}, {}); + } +} + +int main() { + std::vector threads; + threads.reserve(kThreads); + for (size_t i = 0; i != kThreads; ++i) { + threads.emplace_back([i] { + std::mt19937_64 rng(43 * i); + WriteFile(rng); + }); + } + for (auto& thread : threads) { + thread.join(); + } +} diff --git a/microbench/crc_benchmark.cpp b/microbench/crc_benchmark.cpp index 03a754442..c99c9ddd4 100644 --- a/microbench/crc_benchmark.cpp +++ b/microbench/crc_benchmark.cpp @@ -1,3 +1,25 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Valerii Mironov +//////////////////////////////////////////////////////////////////////////////// + #include #include