From c8b6be228f90d626f020efcdbb443af30c5edbee Mon Sep 17 00:00:00 2001 From: Damian Duy Date: Thu, 5 Oct 2023 13:51:27 +0200 Subject: [PATCH] [umf] optimize bucket selection for allocation sizes --- source/common/umf_pools/disjoint_pool.cpp | 61 ++++++++++++++----- .../src/utils/utils.h | 16 +++++ test/unified_malloc_framework/memoryPool.hpp | 11 ++++ 3 files changed, 74 insertions(+), 14 deletions(-) diff --git a/source/common/umf_pools/disjoint_pool.cpp b/source/common/umf_pools/disjoint_pool.cpp index 7259c977ed..004b91d15d 100644 --- a/source/common/umf_pools/disjoint_pool.cpp +++ b/source/common/umf_pools/disjoint_pool.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ // TODO: replace with logger? #include +#include "../unified_malloc_framework/src/utils/utils.h" #include "disjoint_pool.hpp" namespace usm { @@ -283,6 +285,9 @@ class DisjointPool::AllocImpl { // Configuration for this instance DisjointPoolConfig params; + // Used in alghoritm for finding buckets + std::size_t MinBucketSizeExp; + // Coarse-grain allocation min alignment size_t ProviderMinPageSize; @@ -293,8 +298,12 @@ class DisjointPool::AllocImpl { // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. // Powers of 2 and the value halfway between the powers of 2. auto Size1 = params.MinBucketSize; + // MinBucketSize cannot be larger than CutOff + Size1 = (std::min)(Size1, CutOff); // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, MIN_BUCKET_DEFAULT_SIZE); + Size1 = (std::max)(Size1, MIN_BUCKET_DEFAULT_SIZE); + // Calculate the exponent for MinBucketSize used for finding buckets + MinBucketSizeExp = (size_t)log2(Size1); auto Size2 = Size1 + Size1 / 2; for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { Buckets.push_back(std::make_unique(Size1, *this)); @@ -331,6 +340,7 @@ class DisjointPool::AllocImpl { private: Bucket &findBucket(size_t Size); + std::size_t sizeToIdx(size_t Size); }; static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, @@ -404,7 +414,7 @@ size_t Slab::FindFirstAvailableChunkIdx() const { return It - Chunks.begin(); } - return std::numeric_limits::max(); + return (std::numeric_limits::max)(); } void *Slab::getChunk() { @@ -412,7 +422,7 @@ void *Slab::getChunk() { const size_t ChunkIdx = FindFirstAvailableChunkIdx(); // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); + assert(ChunkIdx != ((std::numeric_limits::max)())); void *const FreeChunk = (static_cast(getPtr())) + ChunkIdx * getChunkSize(); @@ -680,7 +690,7 @@ umf_memory_provider_handle_t Bucket::getMemHandle() { size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } +size_t Bucket::SlabAllocSize() { return (std::max)(getSize(), SlabMinSize()); } size_t Bucket::Capacity() { // For buckets used in chunked mode, just one slab in pool is sufficient. @@ -712,9 +722,9 @@ void Bucket::updateStats(int InUse, int InPool) { return; } currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); + maxSlabsInUse = (std::max)(currSlabsInUse, maxSlabsInUse); currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); + maxSlabsInPool = (std::max)(currSlabsInPool, maxSlabsInPool); // Increment or decrement current pool sizes based on whether // slab was added to or removed from pool. OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); @@ -818,16 +828,39 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, return nullptr; } -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { +std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { assert(Size <= CutOff && "Unexpected size"); + assert(Size > 0 && "Unexpected size"); + + size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; + if (Size < MinBucketSize) { + return 0; + } + + // Get the position of the leftmost set bit + size_t position = getLeftmostSetBitPos(Size); - auto It = std::find_if( - Buckets.begin(), Buckets.end(), - [Size](const auto &BucketPtr) { return BucketPtr->getSize() >= Size; }); + size_t lower_bound = (size_t)1 << position; + size_t diff = (position - MinBucketSizeExp) << 1; - assert((It != Buckets.end()) && "Bucket should always exist"); + if (Size == lower_bound) { + return diff; + } else if (Size <= (lower_bound | (lower_bound >> 1))) { + return diff + 1; + } else { + return diff + 2; + } +} + +Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { + auto calculatedIdx = sizeToIdx(Size); + assert(calculatedIdx >= 0); + assert((*(Buckets[calculatedIdx])).getSize() >= Size); + if (calculatedIdx > 0) { + assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); + } - return *(*It); + return *(Buckets[calculatedIdx]); } void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { @@ -883,9 +916,9 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, HighPeakSlabsInUse = 0; for (auto &B : Buckets) { (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); + HighPeakSlabsInUse = (std::max)((*B).maxSlabsInUse, HighPeakSlabsInUse); if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); + HighBucketSize = (std::max)((*B).SlabAllocSize(), HighBucketSize); } } } diff --git a/source/common/unified_malloc_framework/src/utils/utils.h b/source/common/unified_malloc_framework/src/utils/utils.h index 4dd779c57b..d3744af4da 100644 --- a/source/common/unified_malloc_framework/src/utils/utils.h +++ b/source/common/unified_malloc_framework/src/utils/utils.h @@ -14,8 +14,10 @@ #if defined(_WIN32) #include #else +#ifndef __cplusplus #include #endif +#endif #ifdef __cplusplus extern "C" { @@ -72,6 +74,20 @@ static inline void *Zalloc(size_t s) { return m; } +// Retrieves the position of the leftmost set bit. +// The position of the bit is counted from 0 +// e.g. for 01000011110 the position equals 9. +static inline size_t getLeftmostSetBitPos(size_t num) { + // From C++20 countl_zero could be used for that. + size_t position = 0; + while (num > 0) { + num >>= 1; + position++; + } + position--; + return position; +} + #define NOFUNCTION \ do { \ } while (0) diff --git a/test/unified_malloc_framework/memoryPool.hpp b/test/unified_malloc_framework/memoryPool.hpp index ab923932fb..dd8c897c7c 100644 --- a/test/unified_malloc_framework/memoryPool.hpp +++ b/test/unified_malloc_framework/memoryPool.hpp @@ -36,6 +36,8 @@ struct umfPoolTest : umf_test::test, umf::pool_unique_handle_t pool; static constexpr int NTHREADS = 5; + static constexpr std::array nonAlignedAllocSizes = {5, 7, 23, 55, + 80, 119, 247}; }; struct umfMultiPoolTest : umfPoolTest { @@ -86,6 +88,15 @@ TEST_P(umfPoolTest, allocFree) { umfPoolFree(pool.get(), ptr); } +TEST_P(umfPoolTest, allocFreeNonAlignedSizes) { + for (const auto &allocSize : nonAlignedAllocSizes) { + auto *ptr = umfPoolMalloc(pool.get(), allocSize); + ASSERT_NE(ptr, nullptr); + std::memset(ptr, 0, allocSize); + umfPoolFree(pool.get(), ptr); + } +} + TEST_P(umfPoolTest, reallocFree) { if (!umf_test::isReallocSupported(pool.get())) { GTEST_SKIP();