Skip to content

Commit

Permalink
[umf] optimize bucket selection for allocation sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
Damian Duy committed Nov 15, 2023
1 parent 5c0557e commit c8b6be2
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 14 deletions.
61 changes: 47 additions & 14 deletions source/common/umf_pools/disjoint_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <bitset>
#include <cassert>
#include <cctype>
#include <cmath>
#include <iomanip>
#include <limits>
#include <list>
Expand All @@ -25,6 +26,7 @@
// TODO: replace with logger?
#include <iostream>

#include "../unified_malloc_framework/src/utils/utils.h"
#include "disjoint_pool.hpp"

namespace usm {
Expand Down Expand Up @@ -283,6 +285,9 @@ class DisjointPool::AllocImpl {
// Configuration for this instance
DisjointPoolConfig params;

// Used in alghoritm for finding buckets
std::size_t MinBucketSizeExp;

// Coarse-grain allocation min alignment
size_t ProviderMinPageSize;

Expand All @@ -293,8 +298,12 @@ class DisjointPool::AllocImpl {
// Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
// Powers of 2 and the value halfway between the powers of 2.
auto Size1 = params.MinBucketSize;
// MinBucketSize cannot be larger than CutOff
Size1 = (std::min)(Size1, CutOff);
// Buckets sized smaller than the bucket default size- 8 aren't needed.
Size1 = std::max(Size1, MIN_BUCKET_DEFAULT_SIZE);
Size1 = (std::max)(Size1, MIN_BUCKET_DEFAULT_SIZE);
// Calculate the exponent for MinBucketSize used for finding buckets
MinBucketSizeExp = (size_t)log2(Size1);
auto Size2 = Size1 + Size1 / 2;
for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
Buckets.push_back(std::make_unique<Bucket>(Size1, *this));
Expand Down Expand Up @@ -331,6 +340,7 @@ class DisjointPool::AllocImpl {

private:
Bucket &findBucket(size_t Size);
std::size_t sizeToIdx(size_t Size);
};

static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
Expand Down Expand Up @@ -404,15 +414,15 @@ size_t Slab::FindFirstAvailableChunkIdx() const {
return It - Chunks.begin();
}

return std::numeric_limits<size_t>::max();
return (std::numeric_limits<size_t>::max)();
}

void *Slab::getChunk() {
// assert(NumAllocated != Chunks.size());

const size_t ChunkIdx = FindFirstAvailableChunkIdx();
// Free chunk must exist, otherwise we would have allocated another slab
assert(ChunkIdx != (std::numeric_limits<size_t>::max()));
assert(ChunkIdx != ((std::numeric_limits<size_t>::max)()));

void *const FreeChunk =
(static_cast<uint8_t *>(getPtr())) + ChunkIdx * getChunkSize();
Expand Down Expand Up @@ -680,7 +690,7 @@ umf_memory_provider_handle_t Bucket::getMemHandle() {

size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; }

size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); }
size_t Bucket::SlabAllocSize() { return (std::max)(getSize(), SlabMinSize()); }

size_t Bucket::Capacity() {
// For buckets used in chunked mode, just one slab in pool is sufficient.
Expand Down Expand Up @@ -712,9 +722,9 @@ void Bucket::updateStats(int InUse, int InPool) {
return;
}
currSlabsInUse += InUse;
maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse);
maxSlabsInUse = (std::max)(currSlabsInUse, maxSlabsInUse);
currSlabsInPool += InPool;
maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool);
maxSlabsInPool = (std::max)(currSlabsInPool, maxSlabsInPool);
// Increment or decrement current pool sizes based on whether
// slab was added to or removed from pool.
OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize();
Expand Down Expand Up @@ -818,16 +828,39 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
return nullptr;
}

Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) {
std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
assert(Size <= CutOff && "Unexpected size");
assert(Size > 0 && "Unexpected size");

size_t MinBucketSize = (size_t)1 << MinBucketSizeExp;
if (Size < MinBucketSize) {
return 0;
}

// Get the position of the leftmost set bit
size_t position = getLeftmostSetBitPos(Size);

auto It = std::find_if(
Buckets.begin(), Buckets.end(),
[Size](const auto &BucketPtr) { return BucketPtr->getSize() >= Size; });
size_t lower_bound = (size_t)1 << position;
size_t diff = (position - MinBucketSizeExp) << 1;

assert((It != Buckets.end()) && "Bucket should always exist");
if (Size == lower_bound) {
return diff;
} else if (Size <= (lower_bound | (lower_bound >> 1))) {
return diff + 1;
} else {
return diff + 2;
}
}

Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) {
auto calculatedIdx = sizeToIdx(Size);
assert(calculatedIdx >= 0);
assert((*(Buckets[calculatedIdx])).getSize() >= Size);
if (calculatedIdx > 0) {
assert((*(Buckets[calculatedIdx - 1])).getSize() < Size);
}

return *(*It);
return *(Buckets[calculatedIdx]);
}

void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
Expand Down Expand Up @@ -883,9 +916,9 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
HighPeakSlabsInUse = 0;
for (auto &B : Buckets) {
(*B).printStats(TitlePrinted, MTName);
HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse);
HighPeakSlabsInUse = (std::max)((*B).maxSlabsInUse, HighPeakSlabsInUse);
if ((*B).allocCount) {
HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize);
HighBucketSize = (std::max)((*B).SlabAllocSize(), HighBucketSize);
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions source/common/unified_malloc_framework/src/utils/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#if defined(_WIN32)
#include <windows.h>
#else
#ifndef __cplusplus
#include <stdatomic.h>
#endif
#endif

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -72,6 +74,20 @@ static inline void *Zalloc(size_t s) {
return m;
}

// Retrieves the position of the leftmost set bit.
// The position of the bit is counted from 0
// e.g. for 01000011110 the position equals 9.
static inline size_t getLeftmostSetBitPos(size_t num) {
// From C++20 countl_zero could be used for that.
size_t position = 0;
while (num > 0) {
num >>= 1;
position++;
}
position--;
return position;
}

#define NOFUNCTION \
do { \
} while (0)
Expand Down
11 changes: 11 additions & 0 deletions test/unified_malloc_framework/memoryPool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct umfPoolTest : umf_test::test,

umf::pool_unique_handle_t pool;
static constexpr int NTHREADS = 5;
static constexpr std::array<int, 7> nonAlignedAllocSizes = {5, 7, 23, 55,
80, 119, 247};
};

struct umfMultiPoolTest : umfPoolTest {
Expand Down Expand Up @@ -86,6 +88,15 @@ TEST_P(umfPoolTest, allocFree) {
umfPoolFree(pool.get(), ptr);
}

TEST_P(umfPoolTest, allocFreeNonAlignedSizes) {
for (const auto &allocSize : nonAlignedAllocSizes) {
auto *ptr = umfPoolMalloc(pool.get(), allocSize);
ASSERT_NE(ptr, nullptr);
std::memset(ptr, 0, allocSize);
umfPoolFree(pool.get(), ptr);
}
}

TEST_P(umfPoolTest, reallocFree) {
if (!umf_test::isReallocSupported(pool.get())) {
GTEST_SKIP();
Expand Down

0 comments on commit c8b6be2

Please sign in to comment.