Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Try less memory usage (#516)
Browse files Browse the repository at this point in the history
* Try less memory usage

* fixes

* Update columnstore2.cpp

* Add benchmark

* Add benchmark

* Add benchmark
  • Loading branch information
MBkkt committed Aug 14, 2023
1 parent b102ae8 commit 3145f92
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 18 deletions.
3 changes: 3 additions & 0 deletions core/formats/columnstore2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,9 @@ void column::flush_block() {
math::ceil64(docs_count, packed::BLOCK_SIZE_64);
auto* begin = addr_table_.begin();
auto* end = begin + addr_table_size;
if (auto* it = addr_table_.current(); it != end) {
std::memset(it, 0, (end - it) * sizeof(*it));
}

bool all_equal = !data_.file.length();
if (!all_equal) {
Expand Down
37 changes: 19 additions & 18 deletions core/formats/columnstore2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,45 +90,46 @@ class column final : public irs::column_output {

class address_table {
public:
address_table(ManagedTypedAllocator<uint64_t> alloc) : alloc_{alloc} {
offsets_ = alloc_.allocate(kBlockSize);
offset_ = offsets_;
}

~address_table() { alloc_.deallocate(offsets_, kBlockSize); }

uint64_t back() const noexcept {
IRS_ASSERT(offset_ > offsets_);
return *(offset_ - 1);
IRS_ASSERT(offsets_ < offset_);
return offset_[-1];
}

void push_back(uint64_t offset) noexcept {
IRS_ASSERT(offset_ >= offsets_);
IRS_ASSERT(offset_ < offsets_ + kBlockSize);
*offset_++ = offset;
IRS_ASSERT(offset >= offset_[-1]);
}

void pop_back() noexcept {
IRS_ASSERT(offset_ > offsets_);
*--offset_ = 0;
IRS_ASSERT(offsets_ < offset_);
--offset_;
}

// returns number of items to be flushed
uint32_t size() const noexcept {
IRS_ASSERT(offset_ >= offsets_);
return uint32_t(offset_ - offsets_);
return static_cast<uint32_t>(offset_ - offsets_);
}

bool empty() const noexcept { return offset_ == offsets_; }

bool full() const noexcept { return offset_ == std::end(offsets_); }
bool full() const noexcept { return offset_ == offsets_ + kBlockSize; }

void reset() noexcept {
std::memset(offsets_, 0, sizeof offsets_);
offset_ = std::begin(offsets_);
}
void reset() noexcept { offset_ = offsets_; }

uint64_t* begin() noexcept { return std::begin(offsets_); }
uint64_t* begin() noexcept { return offsets_; }
uint64_t* current() noexcept { return offset_; }
uint64_t* end() noexcept { return std::end(offsets_); }
uint64_t* end() noexcept { return offsets_ + kBlockSize; }

private:
uint64_t offsets_[kBlockSize]{};
uint64_t* offset_{offsets_};
ManagedTypedAllocator<uint64_t> alloc_;
uint64_t* offsets_{nullptr};
uint64_t* offset_{nullptr};
};

void prepare(doc_id_t key);
Expand Down
36 changes: 36 additions & 0 deletions microbench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,39 @@ include_directories(iresearch-microbench
$<TARGET_PROPERTY:iresearch-ofst,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:iresearch-utfcpp,INTERFACE_INCLUDE_DIRECTORIES>
)

add_executable(iresearch-memory-file-bench
memory_file_benchmark.cpp
)

set_ipo(iresearch-memory-file-bench)

add_dependencies(iresearch-memory-file-bench
iresearch-static
)

target_include_directories(iresearch-memory-file-bench
PRIVATE ${PROJECT_BINARY_DIR}/core
)

target_link_libraries(iresearch-memory-file-bench
iresearch-static
)

add_executable(iresearch-column-writer
column_writer_benchmark.cpp
)

set_ipo(iresearch-column-writer)

add_dependencies(iresearch-column-writer
iresearch-static
)

target_include_directories(iresearch-column-writer
PRIVATE ${PROJECT_BINARY_DIR}/core
)

target_link_libraries(iresearch-column-writer
iresearch-static
)
57 changes: 57 additions & 0 deletions microbench/column_writer_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2021 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Valerii Mironov
////////////////////////////////////////////////////////////////////////////////

#include <random>
#include <thread>
#include <vector>

#include "formats/columnstore2.hpp"

static constexpr size_t kThreads = 1;
static constexpr size_t kColumns = 1000;
static constexpr size_t kColumnsIter = kColumns * 10;

static void WriteFile(std::mt19937_64& rng) {
auto writer = irs::columnstore2::make_writer(
irs::columnstore2::Version::kMax, false, irs::IResourceManager::kNoop);
// const auto size = size_t{1} << kFileSizePower(rng);
for (size_t i = 0; i != kColumnsIter; ++i) {
if (i % kColumns == 0) {
writer->rollback();
}
writer->push_column({}, {});
}
}

int main() {
std::vector<std::thread> threads;
threads.reserve(kThreads);
for (size_t i = 0; i != kThreads; ++i) {
threads.emplace_back([i] {
std::mt19937_64 rng(43 * i);
WriteFile(rng);
});
}
for (auto& thread : threads) {
thread.join();
}
}
22 changes: 22 additions & 0 deletions microbench/crc_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2021 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Valerii Mironov
////////////////////////////////////////////////////////////////////////////////

#include <benchmark/benchmark.h>

#include <string>
Expand Down

0 comments on commit 3145f92

Please sign in to comment.