From 5ee6e162919f6e4cd1430037e4b9b1819f8e2428 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Mon, 22 May 2023 10:19:12 +0200 Subject: [PATCH] Add a basic pool manager for memory providers --- source/common/ur_pool_manager.hpp | 374 +++++++++++++++++++++++++++++- 1 file changed, 370 insertions(+), 4 deletions(-) diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index e40343acf2..415c7942f1 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -9,10 +9,14 @@ #ifndef USM_POOL_MANAGER_HPP #define USM_POOL_MANAGER_HPP 1 +#include "logger/ur_logger.hpp" +#include "uma_helpers.hpp" #include "ur_api.h" -#include "ur_pool_manager.hpp" #include "ur_util.hpp" +#include +#include + #include #include @@ -27,7 +31,9 @@ struct pool_descriptor { ur_usm_type_t type; bool deviceReadOnly; - static bool equal(const pool_descriptor &lhs, const pool_descriptor &rhs); + bool operator==(const pool_descriptor &other) const; + friend std::ostream &operator<<(std::ostream &os, + const pool_descriptor &desc); static std::size_t hash(const pool_descriptor &desc); static std::pair> create(ur_usm_pool_handle_t poolHandle, ur_context_handle_t hContext); @@ -113,8 +119,9 @@ isSharedAllocationReadOnlyOnDevice(const pool_descriptor &desc) { return desc.type == UR_USM_TYPE_SHARED && desc.deviceReadOnly; } -inline bool pool_descriptor::equal(const pool_descriptor &lhs, - const pool_descriptor &rhs) { +inline bool pool_descriptor::operator==(const pool_descriptor &other) const { + const pool_descriptor &lhs = *this; + const pool_descriptor &rhs = other; ur_native_handle_t lhsNative, rhsNative; // We want to share a memory pool for sub-devices and sub-sub devices. @@ -149,6 +156,14 @@ inline std::size_t pool_descriptor::hash(const pool_descriptor &desc) { desc.poolHandle); } +inline std::ostream &operator<<(std::ostream &os, const pool_descriptor &desc) { + os << "pool handle: " << desc.poolHandle + << " context handle: " << desc.hContext + << " device handle: " << desc.hDevice << " memory type: " << desc.type + << " is read only: " << desc.deviceReadOnly; + return os; +} + inline std::pair> pool_descriptor::create(ur_usm_pool_handle_t poolHandle, ur_context_handle_t hContext) { @@ -191,6 +206,357 @@ pool_descriptor::create(ur_usm_pool_handle_t poolHandle, return {ret, descriptors}; } +struct pool_config { + // TODO: Config influenced by env variables + // dimensions[memType] = {allocMax(MB), capacity, poolSize(MB)} + std::map> dimensions = { + {UR_USM_TYPE_DEVICE, {1, 4, 256}}, + {UR_USM_TYPE_HOST, {1, 4, 256}}, + {UR_USM_TYPE_SHARED, {8, 4, 256}}}; +} PoolConfig; + +struct proxy_pool { + struct block { + // Base address of this block + uintptr_t base = 0; + // Size of the block + size_t size = 0; + // Supported allocation size by this block + size_t chunkSize = 0; + // Total number of slots + uint32_t numSlots = 0; + // Number of slots in use + uint32_t numUsedSlots = 0; + // Cached available slot returned by the last dealloc() call + uint32_t freeSlot = UINT32_MAX; + // Marker for the currently used slots + std::vector usedSlots; + // Used for default initialization + block(){}; + + block(void *base, size_t size, size_t chunkSize) + : base(reinterpret_cast(base)), size(size), + chunkSize(chunkSize) { + numSlots = size / chunkSize; + numUsedSlots = 0; + usedSlots.resize(numSlots, false); + } + + // Check if the current block is fully used + bool isFull() { return numUsedSlots == numSlots; } + + // Check if the given address belongs to the current block + bool contains(void *ptr) { + auto mem = reinterpret_cast(ptr); + return mem >= base && mem < base + size; + } + + // Allocate a single chunk from the block + void *alloc() { + if (isFull()) { + return nullptr; + } + + if (freeSlot != UINT32_MAX) { + uint32_t slot = freeSlot; + freeSlot = UINT32_MAX; + usedSlots[slot] = true; + numUsedSlots++; + return reinterpret_cast(base + slot * chunkSize); + } + + for (size_t i = 0; i < numSlots; i++) { + if (usedSlots[i]) { + continue; + } + usedSlots[i] = true; + numUsedSlots++; + return reinterpret_cast(base + i * chunkSize); + } + + // Should not reach here + assert(0 && "Inconsistent memory pool state"); + return nullptr; + } + + // Deallocate the memory at given address + void dealloc(void *ptr) { + if (!contains(ptr)) { + assert(0 && "Inconsistent memory pool state"); + } + uint32_t slot = + (reinterpret_cast(ptr) - base) / chunkSize; + usedSlots[slot] = false; + numUsedSlots--; + freeSlot = slot; + } + }; // block + + uma_result_t initialize(uma_memory_provider_handle_t providers[], + size_t numProviders, + const pool_descriptor &desc) noexcept { + provider = providers[0]; + + auto memType = desc.type; + auto configAllocMax = PoolConfig.dimensions[memType][0]; + auto configCapacity = PoolConfig.dimensions[memType][1]; + auto configPoolSize = PoolConfig.dimensions[memType][2]; + + blockCapacity = configCapacity; + poolSizeMax = configPoolSize << 20; // MB to B + poolSize = 0; + + // MB to B and round up to power of 2 + allocMax = allocMin << getBucketId(configAllocMax * (1 << 20)); + + // Make bucket for each allocation size between allocMin and allocMax + auto minId = getBucketId(allocMin); + auto maxId = getBucketId(allocMax); + buckets.resize(maxId - minId + 1); + + // Set bucket parameters + for (size_t i = 0; i < buckets.size(); i++) { + size_t chunkSize = allocMin << i; + size_t blockSize = chunkSize * blockCapacity; + if (blockSize <= allocUnit) { + blockSize = + allocUnit; // Allocation unit is already large enough + } + + bucketParams.emplace_back(chunkSize, blockSize); + } + + // Populate each bucket with one memory block + for (size_t i = 0; i < buckets.size(); i++) { + auto [ret, _] = populateBucket(i); + if (ret) { + return ret; + } + } + + return UMA_RESULT_SUCCESS; + } + + void finalize() { + for (auto &bucket : buckets) { + auto ret = clearBucket(bucket); + assert(ret == UMA_RESULT_SUCCESS && + "Inconsistent memory pool state"); + } + } + + std::pair> + populateBucket(uint32_t bucketId) { + auto &bucket = buckets[bucketId]; + auto chunkSize = bucketParams[bucketId].first; + auto blockSize = bucketParams[bucketId].second; + + void *base; + // TODO: Initialize specific memory type (host, device, shared) + auto ret = umaMemoryProviderAlloc(provider, blockSize, 0, &base); + if (ret) { + return {ret, std::nullopt}; + } + + auto &block = bucket.emplace_back(base, blockSize, chunkSize); + return {UMA_RESULT_SUCCESS, &block}; + } + + uma_result_t clearBucket(std::vector &bucket) { + for (auto &block : bucket) { + auto blockBase = reinterpret_cast(block.base); + auto blockSize = block.size; + auto ret = umaMemoryProviderFree(provider, blockBase, blockSize); + if (ret) { + return ret; + } + } + + return UMA_RESULT_SUCCESS; + } + + // Get bucket ID from the specified allocation size + uint32_t getBucketId(size_t size) { + uint32_t count = 0; + for (size_t allocSz = allocMin; allocSz < size; count++) { + allocSz <<= 1; + } + + return count; + } + + void *malloc(size_t size) noexcept { return aligned_malloc(size, 0); } + + void *calloc(size_t num, size_t size) noexcept { + auto mem = malloc(num * size); + // TODO: Zero initialize memory in memory provider specific way + memset(mem, 0, num * size); + return mem; + } + + void *realloc(void *ptr, size_t size) noexcept { + if (ptr && ptrToBlock.count(ptr) == 0) { + return nullptr; + } + + auto mem = malloc(size); + if (!ptr) { + return mem; + } + + // TODO: Use UMA data movement API + memmove(mem, ptr, ptrToBlock[ptr]->chunkSize); + free(ptr); + + return mem; + } + + void *aligned_malloc(size_t size, size_t alignment) noexcept { + // TODO: Use alignment + uint32_t bucketId = getBucketId(size); + auto &bucket = buckets[bucketId]; + + void *mem = nullptr; + for (auto &block : bucket) { + if (block.isFull()) { + continue; + } + + mem = block.alloc(); + ptrToBlock.emplace(mem, &block); + } + + auto isFull = (poolSize >= poolSizeMax); + if (mem == nullptr && !isFull) { + auto [ret, blockOpt] = populateBucket(bucketId); + if (ret || !blockOpt.has_value()) { + return nullptr; + } + + mem = blockOpt.value()->alloc(); + } + + return mem; + } + + size_t malloc_usable_size(void *ptr) noexcept { + if (ptrToBlock.count(ptr) == 0) { + return 0; + } + + return ptrToBlock[ptr]->chunkSize; + } + + void free(void *ptr) noexcept { + if (ptrToBlock.count(ptr) == 0) { + return; + } + + ptrToBlock[ptr]->dealloc(ptr); + ptrToBlock.erase(ptr); + // TODO: Return free'd size? + } + + enum uma_result_t get_last_result(const char **ppMessage) noexcept { + // TODO: Not supported + return UMA_RESULT_ERROR_NOT_SUPPORTED; + } + + // Memory provider tied to the pool + uma_memory_provider_handle_t provider = 0; + // Minimum supported memory allocation size from pool + size_t allocMin = 1 << 6; // 64B + // Maximum supported memory allocation size from pool + size_t allocMax = 0; + // Allocation size when the pool needs to allocate a block + size_t allocUnit = 1 << 16; // 64KB + // Capacity of each block in the buckets which decides number of + // allocatable chunks from the block. Each block in the bucket can serve + // at least BlockCapacity chunks. + // If chunkSize * blockCapacity <= allocUnit + // blockSize = allocUnit + // Otherwise, + // blockSize = chunkSize * blockCapacity + // This simply means how much memory is over-allocated + uint32_t blockCapacity = 0; + // Total memory allocated for this pool + size_t poolSize = 0; + // Maximum allowed pool size + size_t poolSizeMax = 0; + // Buckets with memory blocks + std::vector> buckets; + // List of bucket parameters + std::vector> bucketParams; + // Map from allocated pointer to corresponding block. + std::unordered_map ptrToBlock; +}; // proxy_pool + +template struct pool_manager { + private: + std::unordered_map + descToPoolMap; + + std::optional getPool(const D &desc) noexcept { + auto it = descToPoolMap.find(desc); + if (it == descToPoolMap.end()) { + logger::error("Pool descriptor doesn't match any existing pool: {}", + desc); + return std::nullopt; + } + + return it->second.get(); + } + + public: + std::pair> + create(std::vector> + descHandlePairs) { + pool_manager poolManager; + for (auto &[desc, memProvider] : descHandlePairs) { + auto [ret, hPool] = + uma::poolMakeUnique(&memProvider, 1, desc); + if (ret != UMA_RESULT_SUCCESS) { + logger::error("Failed to create a pool from memory provider: " + "{}, for pool descriptor: {}", + memProvider, desc); + return {ret, std::nullopt}; + } + + poolManager.descToPoolMap.emplace(desc, std::move(hPool)); + } + + return {UMA_RESULT_SUCCESS, std::move(poolManager)}; + } + + uma_result_t addPool(const D &desc, + uma_memory_pool_handle_t hPool) noexcept { + if (descToPoolMap.count(desc) != 0) { + logger::error("Pool for pool descriptor: {}, already exists", desc); + return UMA_RESULT_ERROR_INVALID_ARGUMENT; + } + + descToPoolMap.emplace(desc, std::move(hPool)); + + return UMA_RESULT_SUCCESS; + } + + void *alloc(D desc, size_t size) noexcept { + auto poolHandleOpt = getPool(desc); + if (poolHandleOpt.has_value()) { + return umaPoolMalloc(poolHandleOpt.value(), size); + } + + return nullptr; + } + + void dealloc(D desc, void *ptr) noexcept { + auto poolHandleOpt = getPool(desc); + if (poolHandleOpt.has_value()) { + umaPoolFree(poolHandleOpt.value(), ptr); + } + } +}; + } // namespace usm #endif /* USM_POOL_MANAGER_HPP */