diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index a7dc80f563..9a18287690 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -13,14 +13,13 @@ #include "adapter.hpp" #include "common.hpp" #include "context.hpp" -#include "device.hpp" -#include "event.hpp" #include "platform.hpp" #include "queue.hpp" #include "ur_util.hpp" #include "usm.hpp" #include +#include /// USM: Implements USM Host allocations using CUDA Pinned Memory /// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#page-locked-host-memory @@ -36,12 +35,10 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, return USMHostAllocImpl(ppMem, hContext, nullptr, size, alignment); } - usm::pool_descriptor Desc = {hPool, hContext, nullptr, UR_USM_TYPE_HOST, - false}; - auto hPoolInternalOpt = hPool->PoolManager.getPool(Desc); + usm::pool_descriptor Desc(hPool, hContext, nullptr, UR_USM_TYPE_HOST, + pUSMDesc); + auto hPoolInternalOpt = hPool->getOrCreatePool(Desc); if (!hPoolInternalOpt.has_value()) { - // Internal error, every L0 context and usm pool should have Host, Device, - // Shared and SharedReadOnly UMF pools. return UR_RESULT_ERROR_UNKNOWN; } @@ -70,12 +67,10 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, alignment); } - usm::pool_descriptor Desc = {hPool, hContext, hDevice, UR_USM_TYPE_DEVICE, - false}; - auto hPoolInternalOpt = hPool->PoolManager.getPool(Desc); + usm::pool_descriptor Desc(hPool, hContext, hDevice, UR_USM_TYPE_DEVICE, + pUSMDesc); + auto hPoolInternalOpt = hPool->getOrCreatePool(Desc); if (!hPoolInternalOpt.has_value()) { - // Internal error, every L0 context and usm pool should have Host, Device, - // Shared and SharedReadOnly UMF pools. return UR_RESULT_ERROR_UNKNOWN; } @@ -104,12 +99,10 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, alignment); } - usm::pool_descriptor Desc = {hPool, hContext, hDevice, UR_USM_TYPE_SHARED, - false}; - auto hPoolInternalOpt = hPool->PoolManager.getPool(Desc); + usm::pool_descriptor Desc(hPool, hContext, hDevice, UR_USM_TYPE_SHARED, + pUSMDesc); + auto hPoolInternalOpt = hPool->getOrCreatePool(Desc); if (!hPoolInternalOpt.has_value()) { - // Internal error, every L0 context and usm pool should have Host, Device, - // Shared and SharedReadOnly UMF pools. return UR_RESULT_ERROR_UNKNOWN; } @@ -333,190 +326,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx, - ur_device_handle_t Dev) { - Context = Ctx; - Device = Dev; - // There isn't a way to query this in cuda, and there isn't much info on - // cuda's approach to alignment or transfer granularity between host and - // device. Within UMF this is only used to influence alignment, and since we - // discard that in our alloc implementations it seems we can safely ignore - // this as well, for now. - MinPageSize = 0; - - return UMF_RESULT_SUCCESS; -} - -enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align, - void **Ptr) { - auto Res = allocateImpl(Ptr, Size, Align); - if (Res != UR_RESULT_SUCCESS) { - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - return UMF_RESULT_SUCCESS; -} - -enum umf_result_t USMMemoryProvider::free(void *Ptr, size_t Size) { - (void)Size; - - auto Res = USMFreeImpl(Context, Ptr); - if (Res != UR_RESULT_SUCCESS) { - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - return UMF_RESULT_SUCCESS; -} - -void USMMemoryProvider::get_last_native_error(const char **ErrMsg, - int32_t *ErrCode) { - (void)ErrMsg; - *ErrCode = static_cast(getLastStatusRef()); +ur_result_t CudaUSMSharedMemoryProvider::allocateImpl(void **ResultPtr, + size_t Size, + size_t Alignment) { + return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size, + static_cast(Alignment)); } -umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) { - (void)Ptr; - *PageSize = MinPageSize; - - return UMF_RESULT_SUCCESS; +ur_result_t CudaUSMSharedMemoryProvider::freeImpl(void *Ptr, size_t) { + return USMFreeImpl(Context, Ptr); } -ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { - return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size, - Alignment); -} - -ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { +ur_result_t CudaUSMDeviceMemoryProvider::allocateImpl(void **ResultPtr, + size_t Size, + size_t Alignment) { return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size, - Alignment); + static_cast(Alignment)); } -ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { - return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment); +ur_result_t CudaUSMDeviceMemoryProvider::freeImpl(void *Ptr, size_t) { + return USMFreeImpl(Context, Ptr); } -// Template helper function for creating USM pools for given pool descriptor. -template -std::pair -createUMFPoolForDesc(usm::pool_descriptor &Desc, Args &&...args) { - umf_result_t UmfRet = UMF_RESULT_SUCCESS; - umf::provider_unique_handle_t MemProvider = nullptr; - - switch (Desc.type) { - case UR_USM_TYPE_HOST: { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique(Desc.hContext, - Desc.hDevice); - break; - } - case UR_USM_TYPE_DEVICE: { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique(Desc.hContext, - Desc.hDevice); - break; - } - case UR_USM_TYPE_SHARED: { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique(Desc.hContext, - Desc.hDevice); - break; - } - default: - UmfRet = UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (UmfRet) - return std::pair{ - umf::umf2urResult(UmfRet), nullptr}; - - umf::pool_unique_handle_t Pool = nullptr; - std::tie(UmfRet, Pool) = - umf::poolMakeUnique({std::move(MemProvider)}, args...); - - return std::pair{ - umf::umf2urResult(UmfRet), std::move(Pool)}; -}; - -ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc) - : Context(Context) { - const void *pNext = PoolDesc->pNext; - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = static_cast(pNext); - switch (BaseDesc->stype) { - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *Limits = - reinterpret_cast(BaseDesc); - for (auto &config : DisjointPoolConfigs.Configs) { - config.MaxPoolableSize = Limits->maxPoolableSize; - config.SlabMinSize = Limits->minDriverAllocSize; - } - break; - } - default: { - throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); - } - } - pNext = BaseDesc->pNext; - } - - ur_result_t Ret; - std::tie(Ret, PoolManager) = - usm::pool_manager::create(); - if (Ret) { - throw UsmAllocationException(Ret); - } - - auto Device = Context->DeviceID; - auto UrUSMPool = reinterpret_cast(this); - - // TODO: Replace this with appropriate usm::pool_descriptor 'create' static - // function. - usm::pool_descriptor Descs[] = { - {UrUSMPool, Context, nullptr, UR_USM_TYPE_HOST, false}, - {UrUSMPool, Context, Device, UR_USM_TYPE_DEVICE, false}, - {UrUSMPool, Context, Device, UR_USM_TYPE_SHARED, false}}; - - // Helper lambda function matching USM type to DisjointPoolMemType - auto descTypeToDisjointPoolType = - [](usm::pool_descriptor &Desc) -> usm::DisjointPoolMemType { - switch (Desc.type) { - case UR_USM_TYPE_HOST: - return usm::DisjointPoolMemType::Host; - case UR_USM_TYPE_DEVICE: - return usm::DisjointPoolMemType::Device; - case UR_USM_TYPE_SHARED: - return (Desc.deviceReadOnly) ? usm::DisjointPoolMemType::SharedReadOnly - : usm::DisjointPoolMemType::Shared; - default: - // Added to suppress 'not all control paths return a value' warning. - return usm::DisjointPoolMemType::All; - } - }; - - for (auto &Desc : Descs) { - umf::pool_unique_handle_t Pool = nullptr; - auto PoolType = descTypeToDisjointPoolType(Desc); - - std::tie(Ret, Pool) = createUMFPoolForDesc( - Desc, this->DisjointPoolConfigs.Configs[PoolType]); - if (Ret) { - throw UsmAllocationException(Ret); - } - - PoolManager.addPool(Desc, Pool); - } - - Context->addPool(this); +ur_result_t CudaUSMHostMemoryProvider::allocateImpl(void **ResultPtr, + size_t Size, + size_t Alignment) { + return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, + static_cast(Alignment)); } -bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { - return PoolManager.hasPool(umf_pool); +ur_result_t CudaUSMHostMemoryProvider::freeImpl(void *Ptr, size_t) { + return USMFreeImpl(Context, Ptr); } UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( @@ -534,9 +374,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( try { *Pool = reinterpret_cast( new ur_usm_pool_handle_t_(Context, PoolDesc)); - } catch (const UsmAllocationException &Ex) { + } catch (const umf::UsmAllocationException &Ex) { return Ex.getError(); } + Context->addPool(*Pool); return UR_RESULT_SUCCESS; #else std::ignore = Context; diff --git a/source/adapters/cuda/usm.hpp b/source/adapters/cuda/usm.hpp index 541e3617d3..2ad2d7a458 100644 --- a/source/adapters/cuda/usm.hpp +++ b/source/adapters/cuda/usm.hpp @@ -8,111 +8,27 @@ // //===----------------------------------------------------------------------===// -#include "common.hpp" - #include #include +#include #include -usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); - -struct ur_usm_pool_handle_t_ { - std::atomic_uint32_t RefCount = 1; - - ur_context_handle_t Context = nullptr; - - usm::DisjointPoolAllConfigs DisjointPoolConfigs = - usm::DisjointPoolAllConfigs(); - - usm::pool_manager PoolManager; - - ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc); - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - bool hasUMFPool(umf_memory_pool_t *umf_pool); -}; - -// Exception type to pass allocation errors -class UsmAllocationException { - const ur_result_t Error; - -public: - UsmAllocationException(ur_result_t Err) : Error{Err} {} - ur_result_t getError() const { return Error; } -}; - -// Implements memory allocation via driver API for USM allocator interface. -class USMMemoryProvider { -private: - ur_result_t &getLastStatusRef() { - static thread_local ur_result_t LastStatus = UR_RESULT_SUCCESS; - return LastStatus; - } - -protected: - ur_context_handle_t Context; - ur_device_handle_t Device; - size_t MinPageSize; - - // Internal allocation routine which must be implemented for each allocation - // type - virtual ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) = 0; - -public: - umf_result_t initialize(ur_context_handle_t Ctx, ur_device_handle_t Dev); - umf_result_t alloc(size_t Size, size_t Align, void **Ptr); - umf_result_t free(void *Ptr, size_t Size); - void get_last_native_error(const char **ErrMsg, int32_t *ErrCode); - umf_result_t get_min_page_size(void *, size_t *); - umf_result_t get_recommended_page_size(size_t, size_t *) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - umf_result_t purge_lazy(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - umf_result_t purge_force(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual const char *get_name() = 0; - - virtual ~USMMemoryProvider() = default; -}; - -// Allocation routines for shared memory type -class USMSharedMemoryProvider final : public USMMemoryProvider { -public: - const char *get_name() override { return "USMSharedMemoryProvider"; } - -protected: +class CudaUSMSharedMemoryProvider : public umf::USMSharedMemoryProvider { ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; + size_t Alignment) override; + ur_result_t freeImpl(void *Ptr, size_t) override; }; -// Allocation routines for device memory type -class USMDeviceMemoryProvider final : public USMMemoryProvider { -public: - const char *get_name() override { return "USMSharedMemoryProvider"; } - -protected: +class CudaUSMHostMemoryProvider : public umf::USMHostMemoryProvider { ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; + size_t Alignment) override; + ur_result_t freeImpl(void *Ptr, size_t) override; }; -// Allocation routines for host memory type -class USMHostMemoryProvider final : public USMMemoryProvider { -public: - const char *get_name() override { return "USMSharedMemoryProvider"; } - -protected: +class CudaUSMDeviceMemoryProvider : public umf::USMDeviceMemoryProvider { ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; + size_t Alignment) override; + ur_result_t freeImpl(void *Ptr, size_t) override; }; ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, @@ -129,3 +45,14 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, ur_usm_host_mem_flags_t *Flags, size_t Size, uint32_t Alignment); + +struct ur_usm_pool_handle_t_ + : public usm::pool_handle_base { + ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_usm_pool_desc_t *PoolDesc) + : usm::pool_handle_base(Context, PoolDesc){}; +}; diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index f161fd411d..dc8bd4d494 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -14,7 +14,9 @@ #include #include "context.hpp" -#include "ur_level_zero.hpp" +#include "event.hpp" +#include "platform.hpp" +#include "usm.hpp" UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, ///< [in] the number of devices given in phDevices @@ -183,67 +185,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -// Template helper function for creating USM pools for given pool descriptor. -template -std::pair -createUMFPoolForDesc(usm::pool_descriptor &Desc, Args &&...args) { - umf_result_t UmfRet = UMF_RESULT_SUCCESS; - umf::provider_unique_handle_t MemProvider = nullptr; - - switch (Desc.type) { - case UR_USM_TYPE_HOST: { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique(Desc.hContext, - Desc.hDevice); - break; - } - case UR_USM_TYPE_DEVICE: { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique(Desc.hContext, - Desc.hDevice); - break; - } - case UR_USM_TYPE_SHARED: { - if (Desc.deviceReadOnly) { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique( - Desc.hContext, Desc.hDevice); - } else { - std::tie(UmfRet, MemProvider) = - umf::memoryProviderMakeUnique(Desc.hContext, - Desc.hDevice); - } - break; - } - default: - UmfRet = UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (UmfRet) - return std::pair{ - umf::umf2urResult(UmfRet), nullptr}; - - umf::pool_unique_handle_t Pool = nullptr; - std::tie(UmfRet, Pool) = - umf::poolMakeUnique({std::move(MemProvider)}, args...); - - return std::pair{ - umf::umf2urResult(UmfRet), std::move(Pool)}; -}; - ur_result_t ur_context_handle_t_::initialize() { + // Create the immediate command list to be used for initializations. + // Created as synchronous so level-zero performs implicit synchronization and + // there is no need to query for completion in the plugin + // + // TODO: we use Device[0] here as the single immediate command-list + // for buffer creation and migration. Initialization is in + // in sync and is always performed to Devices[0] as well but + // D2D migartion, if no P2P, is broken since it should use + // immediate command-list for the specfic devices, and this single one. + // + ur_device_handle_t Device = SingleRootDevice ? SingleRootDevice : Devices[0]; - auto Context = reinterpret_cast(this); - ur_result_t Ret; - - // Initialize pool managers. - std::tie(Ret, PoolManager) = - usm::pool_manager::create(); - if (Ret) { - urPrint("urContextCreate: unexpected internal error\n"); - return Ret; + // We can initialize our default pool with a regular pool create + ur_usm_pool_desc_t PoolDesc = {UR_STRUCTURE_TYPE_USM_POOL_DESC, nullptr, 0}; + if (auto err = urUSMPoolCreate(this, &PoolDesc, &Pool); + err != UR_RESULT_SUCCESS) { + return err; } + // Initializing the proxy pool manager is a little more involved + ur_result_t Ret; std::tie(Ret, ProxyPoolManager) = usm::pool_manager::create(); if (Ret) { @@ -253,44 +216,19 @@ ur_result_t ur_context_handle_t_::initialize() { std::vector Descs; // Create pool descriptor for every device and subdevice. - std::tie(Ret, Descs) = usm::pool_descriptor::create(nullptr, Context); + std::tie(Ret, Descs) = usm::pool_descriptor::createDefaults(nullptr, this); if (Ret) { urPrint("urContextCreate: unexpected internal error\n"); return Ret; } - auto descTypeToDisjointPoolType = - [](usm::pool_descriptor &Desc) -> usm::DisjointPoolMemType { - switch (Desc.type) { - case UR_USM_TYPE_HOST: - return usm::DisjointPoolMemType::Host; - case UR_USM_TYPE_DEVICE: - return usm::DisjointPoolMemType::Device; - case UR_USM_TYPE_SHARED: - return (Desc.deviceReadOnly) ? usm::DisjointPoolMemType::SharedReadOnly - : usm::DisjointPoolMemType::Shared; - default: - // Added to suppress 'not all control paths return a value' warning. - return usm::DisjointPoolMemType::All; - } - }; - // Create USM pool for each pool descriptor and add it to pool manager. for (auto &Desc : Descs) { - umf::pool_unique_handle_t Pool = nullptr; - auto PoolType = descTypeToDisjointPoolType(Desc); - - std::tie(Ret, Pool) = createUMFPoolForDesc( - Desc, DisjointPoolConfigInstance.Configs[PoolType]); - if (Ret) { - urPrint("urContextCreate: unexpected internal error\n"); - return Ret; - } - - PoolManager.addPool(Desc, Pool); - umf::pool_unique_handle_t ProxyPool = nullptr; - std::tie(Ret, ProxyPool) = createUMFPoolForDesc(Desc); + std::tie(Ret, ProxyPool) = + usm::createUMFPoolForDesc(Desc); if (Ret) { urPrint("urContextCreate: unexpected internal error\n"); return Ret; @@ -299,18 +237,6 @@ ur_result_t ur_context_handle_t_::initialize() { ProxyPoolManager.addPool(Desc, ProxyPool); } - // Create the immediate command list to be used for initializations. - // Created as synchronous so level-zero performs implicit synchronization and - // there is no need to query for completion in the plugin - // - // TODO: we use Device[0] here as the single immediate command-list - // for buffer creation and migration. Initialization is in - // in sync and is always performed to Devices[0] as well but - // D2D migartion, if no P2P, is broken since it should use - // immediate command-list for the specfic devices, and this single one. - // - ur_device_handle_t Device = SingleRootDevice ? SingleRootDevice : Devices[0]; - // Prefer to use copy engine for initialization copies, // if available and allowed (main copy engine with index 0). ZeStruct ZeCommandQueueDesc; @@ -410,6 +336,10 @@ ur_result_t ur_context_handle_t_::finalize() { // urContextRelease. There could be some memory that may have not been // deallocated. For example, event and event pool caches would be still alive. + if (auto Err = urUSMPoolRelease(Pool); Err != UR_RESULT_SUCCESS) { + return Err; + } + if (!DisableEventsCaching) { std::scoped_lock Lock(EventCacheMutex); for (auto &EventCache : EventCaches) { diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index c6f292e5b4..18b059df1b 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -95,20 +95,10 @@ struct ur_context_handle_t_ : _ur_object { ZeStruct>>> ZeCopyCommandListCache; - // Store USM pool for USM shared and device allocations. There is 1 memory - // pool per each pair of (context, device) per each memory type. - usm::pool_manager PoolManager; + // This default pool is used when a pool isn't provided by the application. + ur_usm_pool_handle_t Pool; usm::pool_manager ProxyPoolManager; - // Allocation-tracking proxy pools for direct allocations. No pooling used. - std::unordered_map - DeviceMemProxyPools; - std::unordered_map - SharedMemProxyPools; - std::unordered_map - SharedReadOnlyMemProxyPools; - umf::pool_unique_handle_t HostMemProxyPool; - // Map associating pools created with urUsmPoolCreate and internal pools std::list UsmPoolHandles{}; @@ -246,8 +236,3 @@ struct ur_context_handle_t_ : _ur_object { // mutex guarding the container with contexts because the context can be removed // from the list of tracked contexts. ur_result_t ContextReleaseHelper(ur_context_handle_t Context); - -// Template helper function for creating USM pools for given pool descriptor. -template -std::pair -createUMFPoolForDesc(usm::pool_descriptor &Desc, Args &&...args); diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 1c654b1857..2ab8915018 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -8,17 +8,13 @@ // //===----------------------------------------------------------------------===// -#include -#include #include #include "common.hpp" #include "context.hpp" -#include "event.hpp" +#include "platform.hpp" #include "usm.hpp" -#include "ur_level_zero.hpp" - #include usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig() { @@ -144,7 +140,7 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, ur_device_handle_t Device, ur_usm_device_mem_flags_t *Flags, - size_t Size, uint32_t Alignment) { + size_t Size, size_t Alignment) { std::ignore = Flags; // TODO: translate PI properties to Level Zero flags ZeStruct ZeDesc; @@ -185,12 +181,10 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, return UR_RESULT_SUCCESS; } -static ur_result_t USMSharedAllocImpl(void **ResultPtr, - ur_context_handle_t Context, - ur_device_handle_t Device, - ur_usm_host_mem_flags_t *, - ur_usm_device_mem_flags_t *, size_t Size, - uint32_t Alignment) { +static ur_result_t +USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_device_handle_t Device, ur_usm_host_mem_flags_t *, + ur_usm_device_mem_flags_t *, size_t Size, size_t Alignment) { // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; @@ -236,7 +230,7 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, static ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, ur_usm_host_mem_flags_t *Flags, size_t Size, - uint32_t Alignment) { + size_t Alignment) { std::ignore = Flags; // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; @@ -309,8 +303,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( // find the allocator depending on context as we do for Shared and Device // allocations. std::optional hPoolInternalOpt = std::nullopt; - usm::pool_descriptor Desc = {nullptr, Context, nullptr, UR_USM_TYPE_HOST, - false}; + usm::pool_descriptor Desc(nullptr, Context, nullptr, UR_USM_TYPE_HOST, + USMDesc); if (!UseUSMAllocator || // L0 spec says that allocation fails if Alignment != 2^n, in order to // keep the same behavior for the allocator, just call L0 API directly and @@ -320,9 +314,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( } else if (Pool) { // Getting user-created pool requires 'poolHandle' field. Desc.poolHandle = Pool; - hPoolInternalOpt = Pool->PoolManager.getPool(Desc); + hPoolInternalOpt = Pool->getOrCreatePool(Desc); } else { - hPoolInternalOpt = Context->PoolManager.getPool(Desc); + hPoolInternalOpt = Context->Pool->getOrCreatePool(Desc); } if (!hPoolInternalOpt.has_value()) { @@ -392,8 +386,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( } std::optional hPoolInternalOpt = std::nullopt; - usm::pool_descriptor Desc = {nullptr, Context, Device, UR_USM_TYPE_DEVICE, - false}; + usm::pool_descriptor Desc(nullptr, Context, Device, UR_USM_TYPE_DEVICE, + USMDesc); if (!UseUSMAllocator || // L0 spec says that allocation fails if Alignment != 2^n, in order to // keep the same behavior for the allocator, just call L0 API directly and @@ -403,9 +397,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( } else if (Pool) { // Getting user-created pool requires 'poolHandle' field. Desc.poolHandle = Pool; - hPoolInternalOpt = Pool->PoolManager.getPool(Desc); + hPoolInternalOpt = Pool->getOrCreatePool(Desc); } else { - hPoolInternalOpt = Context->PoolManager.getPool(Desc); + hPoolInternalOpt = Context->Pool->getOrCreatePool(Desc); } if (!hPoolInternalOpt.has_value()) { @@ -446,31 +440,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( uint32_t Alignment = USMDesc ? USMDesc->align : 0; - ur_usm_host_mem_flags_t UsmHostFlags{}; - - // See if the memory is going to be read-only on the device. - bool DeviceReadOnly = false; - ur_usm_device_mem_flags_t UsmDeviceFlags{}; - - void *pNext = USMDesc ? const_cast(USMDesc->pNext) : nullptr; - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = - reinterpret_cast(pNext); - if (BaseDesc->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const ur_usm_device_desc_t *UsmDeviceDesc = - reinterpret_cast(pNext); - UsmDeviceFlags = UsmDeviceDesc->flags; - } - if (BaseDesc->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const ur_usm_host_desc_t *UsmHostDesc = - reinterpret_cast(pNext); - UsmHostFlags = UsmHostDesc->flags; - std::ignore = UsmHostFlags; - } - pNext = const_cast(BaseDesc->pNext); - } - DeviceReadOnly = UsmDeviceFlags & UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; - // L0 supports alignment up to 64KB and silently ignores higher values. // We flag alignment > 64KB as an invalid value. if (Alignment > 65536) @@ -496,8 +465,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( } std::optional hPoolInternalOpt = std::nullopt; - usm::pool_descriptor Desc = {nullptr, Context, Device, UR_USM_TYPE_SHARED, - DeviceReadOnly}; + usm::pool_descriptor Desc(nullptr, Context, Device, UR_USM_TYPE_SHARED, + USMDesc); if (!UseUSMAllocator || // L0 spec says that allocation fails if Alignment != 2^n, in order to // keep the same behavior for the allocator, just call L0 API directly and @@ -507,9 +476,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( } else if (Pool) { // Getting user-created pool requires 'poolHandle' field. Desc.poolHandle = Pool; - hPoolInternalOpt = Pool->PoolManager.getPool(Desc); + hPoolInternalOpt = Pool->getOrCreatePool(Desc); } else { - hPoolInternalOpt = Context->PoolManager.getPool(Desc); + hPoolInternalOpt = Context->Pool->getOrCreatePool(Desc); } if (!hPoolInternalOpt.has_value()) { @@ -629,7 +598,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return UR_RESULT_SUCCESS; } -static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { +ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr)); // Handle When the driver is already released if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { @@ -639,212 +608,82 @@ static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { } } -static ur_result_t USMQueryPageSize(ur_context_handle_t Context, void *Ptr, - size_t *PageSize) { - ZeStruct AllocProperties = {}; - ZE2UR_CALL(zeMemGetAllocProperties, - (Context->ZeContext, Ptr, &AllocProperties, nullptr)); - *PageSize = AllocProperties.pageSize; - - return UR_RESULT_SUCCESS; -} - -umf_result_t L0MemoryProvider::initialize(ur_context_handle_t Ctx, - ur_device_handle_t Dev) { - Context = Ctx; - Device = Dev; - - return UMF_RESULT_SUCCESS; -} - -enum umf_result_t L0MemoryProvider::alloc(size_t Size, size_t Align, - void **Ptr) { - auto Res = allocateImpl(Ptr, Size, Align); - if (Res != UR_RESULT_SUCCESS) { - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - return UMF_RESULT_SUCCESS; -} - -enum umf_result_t L0MemoryProvider::free(void *Ptr, size_t Size) { - (void)Size; - - auto Res = USMFreeImpl(Context, Ptr); - if (Res != UR_RESULT_SUCCESS) { - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - return UMF_RESULT_SUCCESS; -} - -umf_result_t L0MemoryProvider::GetL0MinPageSize(void *Mem, size_t *PageSize) { - ur_result_t Res = UR_RESULT_SUCCESS; - void *Ptr = Mem; - - if (!Mem) { - Res = allocateImpl(&Ptr, 1, 0); - if (Res != UR_RESULT_SUCCESS) { - goto err_set_status; +ur_result_t GetL0MinPageSize(ur_context_handle_t Context, void *Ptr, + size_t *PageSize, + umf::USMMemoryProvider *Provider) { + // If we didn't get a pointer to check we need to use the memory provider to + // make a small allocation so we can run the query + void *CheckAlloc = Ptr; + if (!Ptr) { + if (auto AllocRes = Provider->alloc(1, 0, &CheckAlloc); + AllocRes != UMF_RESULT_SUCCESS) { + return umf::umf2urResult(AllocRes); } } - // Query L0 for the minimal page size. - Res = USMQueryPageSize(Context, Ptr, PageSize); - if (Res != UR_RESULT_SUCCESS) { - goto err_dealloc; + ZeStruct AllocProperties = {}; + auto Res = ze2urResult( + ZE_CALL_NOCHECK(zeMemGetAllocProperties, (Context->ZeContext, CheckAlloc, + &AllocProperties, nullptr))); + if (Res == UR_RESULT_SUCCESS) { + *PageSize = AllocProperties.pageSize; } - if (!Mem) { - Res = USMFreeImpl(Context, Ptr); - if (Res != UR_RESULT_SUCCESS) { - goto err_set_status; + if (!Ptr) { + if (auto FreeRes = Provider->free(CheckAlloc, 1); + FreeRes != UMF_RESULT_SUCCESS) { + return umf::umf2urResult(FreeRes); } } - return UMF_RESULT_SUCCESS; - -err_dealloc: - if (!Mem) { - USMFreeImpl(Context, Ptr); - } -err_set_status: - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + return Res; } -umf_result_t L0MemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) { - std::ignore = Ptr; - - // Query L0 for min page size. Use provided 'Ptr'. - if (Ptr) { - return GetL0MinPageSize(Ptr, PageSize); - } - - // Return cached min page size. - if (MinPageSizeCached) { - *PageSize = MinPageSize; - return UMF_RESULT_SUCCESS; - } - - // Query L0 for min page size and cache it in 'MinPageSize'. - auto Ret = GetL0MinPageSize(nullptr, &MinPageSize); - if (Ret) { - return Ret; +umf_result_t L0SharedMemoryProvider::initialize(usm::pool_descriptor Desc) { + if (auto Res = USMSharedMemoryProvider::initialize(Desc); + Res != UMF_RESULT_SUCCESS) { + return Res; } - - *PageSize = MinPageSize; - MinPageSizeCached = true; - + UMF_PROVIDER_CHECK_UR_RESULT( + GetL0MinPageSize(Context, nullptr, &MinPageSize, this)) return UMF_RESULT_SUCCESS; } ur_result_t L0SharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { + size_t Alignment) { return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size, Alignment); } -ur_result_t L0SharedReadOnlyMemoryProvider::allocateImpl(void **ResultPtr, - size_t Size, - uint32_t Alignment) { - ur_usm_device_desc_t UsmDeviceDesc{}; - UsmDeviceDesc.flags = UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; - ur_usm_host_desc_t UsmHostDesc{}; - return USMSharedAllocImpl(ResultPtr, Context, Device, &UsmDeviceDesc.flags, - &UsmHostDesc.flags, Size, Alignment); +umf_result_t L0DeviceMemoryProvider::initialize(usm::pool_descriptor Desc) { + if (auto Res = USMDeviceMemoryProvider::initialize(Desc); + Res != UMF_RESULT_SUCCESS) { + return Res; + } + UMF_PROVIDER_CHECK_UR_RESULT( + GetL0MinPageSize(Context, nullptr, &MinPageSize, this)) + return UMF_RESULT_SUCCESS; } ur_result_t L0DeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { + size_t Alignment) { return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size, Alignment); } -ur_result_t L0HostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { - return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment); -} - -ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc) { - - this->Context = Context; - zeroInit = static_cast(PoolDesc->flags & - UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK); - - void *pNext = const_cast(PoolDesc->pNext); - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = - reinterpret_cast(pNext); - switch (BaseDesc->stype) { - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *Limits = - reinterpret_cast(BaseDesc); - for (auto &config : DisjointPoolConfigs.Configs) { - config.MaxPoolableSize = Limits->maxPoolableSize; - config.SlabMinSize = Limits->minDriverAllocSize; - } - break; - } - default: { - urPrint("urUSMPoolCreate: unexpected chained stype\n"); - throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); - } - } - pNext = const_cast(BaseDesc->pNext); +umf_result_t L0HostMemoryProvider::initialize(usm::pool_descriptor Desc) { + if (auto Res = USMHostMemoryProvider::initialize(Desc); + Res != UMF_RESULT_SUCCESS) { + return Res; } + UMF_PROVIDER_CHECK_UR_RESULT( + GetL0MinPageSize(Context, nullptr, &MinPageSize, this)) + return UMF_RESULT_SUCCESS; +} - ur_result_t Ret; - std::tie(Ret, PoolManager) = - usm::pool_manager::create(); - if (Ret) { - urPrint("urUSMPoolCreate: unexpected internal error\n"); - throw UsmAllocationException(Ret); - } - - std::vector Descs; - // Create pool descriptor for every device and subdevice. - std::tie(Ret, Descs) = usm::pool_descriptor::create( - reinterpret_cast(this), Context); - if (Ret) { - urPrint("urUSMPoolCreate: unexpected internal error\n"); - throw UsmAllocationException(Ret); - } - - auto descTypeToDisjointPoolType = - [](usm::pool_descriptor &Desc) -> usm::DisjointPoolMemType { - switch (Desc.type) { - case UR_USM_TYPE_HOST: - return usm::DisjointPoolMemType::Host; - case UR_USM_TYPE_DEVICE: - return usm::DisjointPoolMemType::Device; - case UR_USM_TYPE_SHARED: - return (Desc.deviceReadOnly) ? usm::DisjointPoolMemType::SharedReadOnly - : usm::DisjointPoolMemType::Shared; - default: - assert(0 && "Invalid pool descriptor type!"); - // Added to suppress 'not all control paths return a value' warning. - return usm::DisjointPoolMemType::All; - } - }; - - // Create USM pool for each pool descriptor and add it to pool manager. - for (auto &Desc : Descs) { - umf::pool_unique_handle_t Pool = nullptr; - auto PoolType = descTypeToDisjointPoolType(Desc); - - std::tie(Ret, Pool) = createUMFPoolForDesc( - Desc, DisjointPoolConfigInstance.Configs[PoolType]); - if (Ret) { - urPrint("urUSMPoolCreate: unexpected internal error\n"); - throw UsmAllocationException(Ret); - } - - PoolManager.addPool(Desc, Pool); - } +ur_result_t L0HostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, + size_t Alignment) { + return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment); } UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( @@ -862,7 +701,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( std::shared_lock ContextLock(Context->Mutex); Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); - } catch (const UsmAllocationException &Ex) { + } catch (const umf::UsmAllocationException &Ex) { return Ex.getError(); } return UR_RESULT_SUCCESS; @@ -871,18 +710,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( ur_result_t urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool ) { - Pool->RefCount.increment(); + Pool->incrementReferenceCount(); return UR_RESULT_SUCCESS; } ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool ) { - if (Pool->RefCount.decrementAndTest()) { - std::shared_lock ContextLock(Pool->Context->Mutex); - Pool->Context->UsmPoolHandles.remove(Pool); - delete Pool; + if (Pool->decrementReferenceCount() > 0) { + return UR_RESULT_SUCCESS; } + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); + delete Pool; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/usm.hpp b/source/adapters/level_zero/usm.hpp index 78a335a04d..9eee0d8c5c 100644 --- a/source/adapters/level_zero/usm.hpp +++ b/source/adapters/level_zero/usm.hpp @@ -9,125 +9,102 @@ //===----------------------------------------------------------------------===// #pragma once -#include "common.hpp" - #include +#include #include usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); -struct ur_usm_pool_handle_t_ : _ur_object { - bool zeroInit; - - usm::DisjointPoolAllConfigs DisjointPoolConfigs = - InitializeDisjointPoolConfig(); +ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr); - usm::pool_manager PoolManager; +// Helper that queries the min page size from the given pointer. If no pointer +// is provided (i.e. Ptr == nullptr), Provider is used to create a small +// allocation for the purpose of running the query. +ur_result_t GetL0MinPageSize(ur_context_handle_t Context, void *Ptr, + size_t *PageSize, + umf::USMMemoryProvider *Provider); - ur_context_handle_t Context{}; +// Allocation routines for shared memory type +class L0SharedMemoryProvider final : public umf::USMSharedMemoryProvider { +public: + umf_result_t initialize(usm::pool_descriptor Desc) override; - ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc); -}; + umf_result_t get_min_page_size(void *Ptr, size_t *PageSize) override { + if (!Ptr) { + *PageSize = MinPageSize; + return UMF_RESULT_SUCCESS; + } + UMF_PROVIDER_CHECK_UR_RESULT( + GetL0MinPageSize(Context, Ptr, PageSize, this)); + return UMF_RESULT_SUCCESS; + } -// Exception type to pass allocation errors -class UsmAllocationException { - const ur_result_t Error; +protected: + ur_result_t allocateImpl(void **ResultPtr, size_t Size, + size_t Alignment) override; -public: - UsmAllocationException(ur_result_t Err) : Error{Err} {} - ur_result_t getError() const { return Error; } + ur_result_t freeImpl(void *Ptr, size_t) override { + return USMFreeImpl(Context, Ptr); + } }; -// UMF memory provider interface for USM. -class USMMemoryProviderBase { -protected: - ur_context_handle_t Context; - ur_device_handle_t Device; +// Allocation routines for device memory type +class L0DeviceMemoryProvider final : public umf::USMDeviceMemoryProvider { +public: + umf_result_t initialize(usm::pool_descriptor Desc) override; - ur_result_t &getLastStatusRef() { - static thread_local ur_result_t LastStatus = UR_RESULT_SUCCESS; - return LastStatus; + umf_result_t get_min_page_size(void *Ptr, size_t *PageSize) override { + if (!Ptr) { + *PageSize = MinPageSize; + return UMF_RESULT_SUCCESS; + } + UMF_PROVIDER_CHECK_UR_RESULT( + GetL0MinPageSize(Context, Ptr, PageSize, this)); + return UMF_RESULT_SUCCESS; } - // Internal allocation routine which must be implemented for each allocation - // type - virtual ur_result_t allocateImpl(void **, size_t, uint32_t) = 0; +protected: + ur_result_t allocateImpl(void **ResultPtr, size_t Size, + size_t Alignment) override; -public: - virtual void get_last_native_error(const char **ErrMsg, int32_t *ErrCode) { - std::ignore = ErrMsg; - *ErrCode = static_cast(getLastStatusRef()); - }; - virtual umf_result_t initialize(ur_context_handle_t, ur_device_handle_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual umf_result_t alloc(size_t, size_t, void **) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual umf_result_t free(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual umf_result_t get_min_page_size(void *, size_t *) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual umf_result_t get_recommended_page_size(size_t, size_t *) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual umf_result_t purge_lazy(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual umf_result_t purge_force(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual const char *get_name() { return ""; }; - virtual ~USMMemoryProviderBase() = default; + ur_result_t freeImpl(void *Ptr, size_t) override { + return USMFreeImpl(Context, Ptr); + } }; -// Implements USM memory provider interface for L0 RT USM memory allocations. -class L0MemoryProvider : public USMMemoryProviderBase { -private: - // Min page size query function for L0MemoryProvider. - umf_result_t GetL0MinPageSize(void *Mem, size_t *PageSize); - size_t MinPageSize = 0; - bool MinPageSizeCached = false; - +// Allocation routines for host memory type +class L0HostMemoryProvider final : public umf::USMHostMemoryProvider { public: - umf_result_t initialize(ur_context_handle_t Ctx, - ur_device_handle_t Dev) override; - umf_result_t alloc(size_t Size, size_t Align, void **Ptr) override; - umf_result_t free(void *Ptr, size_t Size) override; - umf_result_t get_min_page_size(void *, size_t *) override; - // TODO: Different name for each provider (Host/Shared/SharedRO/Device) - const char *get_name() override { return "L0"; }; -}; + umf_result_t initialize(usm::pool_descriptor Desc) override; -// Allocation routines for shared memory type -class L0SharedMemoryProvider final : public L0MemoryProvider { -protected: - ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; -}; + umf_result_t get_min_page_size(void *Ptr, size_t *PageSize) override { + if (!Ptr) { + *PageSize = MinPageSize; + return UMF_RESULT_SUCCESS; + } + UMF_PROVIDER_CHECK_UR_RESULT( + GetL0MinPageSize(Context, Ptr, PageSize, this)); + return UMF_RESULT_SUCCESS; + } -// Allocation routines for shared memory type that is only modified from host. -class L0SharedReadOnlyMemoryProvider final : public L0MemoryProvider { protected: ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; -}; + size_t Alignment) override; -// Allocation routines for device memory type -class L0DeviceMemoryProvider final : public L0MemoryProvider { -protected: - ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; + ur_result_t freeImpl(void *Ptr, size_t) override { + return USMFreeImpl(Context, Ptr); + } }; -// Allocation routines for host memory type -class L0HostMemoryProvider final : public L0MemoryProvider { -protected: - ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; +#undef PROVIDER_CHECK_UR_RESULT + +struct ur_usm_pool_handle_t_ + : public usm::pool_handle_base { + ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_usm_pool_desc_t *PoolDesc) + : usm::pool_handle_base(Context, PoolDesc){}; }; // Simple proxy for memory allocations. It is used for the UMF tracking diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 5c6fb231da..83debd8ded 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -8,7 +8,9 @@ add_subdirectory(umf_pools) add_ur_library(ur_common STATIC umf_helpers.hpp + umf_helpers.cpp ur_pool_manager.hpp + ur_pool_handle.hpp $<$:windows/ur_lib_loader.cpp> $<$:linux/ur_lib_loader.cpp> ) diff --git a/source/common/umf_helpers.cpp b/source/common/umf_helpers.cpp new file mode 100644 index 0000000000..5397e9778c --- /dev/null +++ b/source/common/umf_helpers.cpp @@ -0,0 +1,57 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include "umf_helpers.hpp" +#include "ur_pool_manager.hpp" + +namespace umf { +umf_result_t USMSharedMemoryProvider::initialize(usm::pool_descriptor Desc) { + Context = Desc.hContext; + Device = Desc.hDevice; + usmDesc.pNext = &hostDesc; + hostDesc.flags = Desc.hostFlags; + hostDesc.pNext = &deviceDesc; + deviceDesc.flags = Desc.deviceFlags; + + if (Desc.allocLocation.has_value()) { + allocLocation = Desc.allocLocation.value(); + deviceDesc.pNext = &allocLocation; + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t USMDeviceMemoryProvider::initialize(usm::pool_descriptor Desc) { + Context = Desc.hContext; + Device = Desc.hDevice; + usmDesc.pNext = &deviceDesc; + deviceDesc.flags = Desc.deviceFlags; + + if (Desc.allocLocation.has_value()) { + allocLocation = Desc.allocLocation.value(); + deviceDesc.pNext = &allocLocation; + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t USMHostMemoryProvider::initialize(usm::pool_descriptor Desc) { + Context = Desc.hContext; + usmDesc.pNext = &hostDesc; + hostDesc.flags = Desc.hostFlags; + + if (Desc.allocLocation.has_value()) { + allocLocation = Desc.allocLocation.value(); + hostDesc.pNext = &allocLocation; + } + + return UMF_RESULT_SUCCESS; +} +} // namespace umf diff --git a/source/common/umf_helpers.hpp b/source/common/umf_helpers.hpp index 4f2113f3d6..0d2083139d 100644 --- a/source/common/umf_helpers.hpp +++ b/source/common/umf_helpers.hpp @@ -20,10 +20,14 @@ #include #include #include -#include #include #include +namespace usm { +struct pool_descriptor; +class DisjointPoolConfig; +} // namespace usm + namespace umf { using pool_unique_handle_t = @@ -220,6 +224,116 @@ inline ur_result_t umf2urResult(umf_result_t umfResult) { }; } +// Exception type to pass allocation errors +class UsmAllocationException { + const ur_result_t Error; + + public: + UsmAllocationException(ur_result_t Err) : Error{Err} {} + ur_result_t getError() const { return Error; } +}; + +#define UMF_PROVIDER_CHECK_UR_RESULT(Res) \ + if (Res != UR_RESULT_SUCCESS) { \ + getLastStatusRef() = Res; \ + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; \ + } + +// Implements memory allocation via driver API for USM allocator interface. +class USMMemoryProvider { + protected: + ur_result_t &getLastStatusRef() { + static thread_local ur_result_t LastStatus = UR_RESULT_SUCCESS; + return LastStatus; + } + + ur_context_handle_t Context; + ur_device_handle_t Device; + size_t MinPageSize = 0; + + virtual ur_result_t allocateImpl(void **, size_t, size_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + virtual ur_result_t freeImpl(void *, size_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + public: + virtual umf_result_t initialize(usm::pool_descriptor) = 0; + + umf_result_t alloc(size_t Size, size_t Align, void **Ptr) { + UMF_PROVIDER_CHECK_UR_RESULT(allocateImpl(Ptr, Size, Align)); + return UMF_RESULT_SUCCESS; + } + umf_result_t free(void *Ptr, size_t Size) { + UMF_PROVIDER_CHECK_UR_RESULT(freeImpl(Ptr, Size)); + return UMF_RESULT_SUCCESS; + } + void get_last_native_error(const char **, int32_t *ErrCode) { + *ErrCode = static_cast(getLastStatusRef()); + } + virtual umf_result_t get_min_page_size(void *, size_t *PageSize) { + *PageSize = MinPageSize; + return UMF_RESULT_SUCCESS; + } + + umf_result_t get_recommended_page_size(size_t, size_t *) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + }; + umf_result_t purge_lazy(void *, size_t) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + }; + umf_result_t purge_force(void *, size_t) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + }; + virtual const char *get_name() = 0; + + virtual ~USMMemoryProvider() = default; +}; + +// Allocation routines for shared memory type +class USMSharedMemoryProvider : public USMMemoryProvider { + public: + umf_result_t initialize(usm::pool_descriptor Desc) override; + const char *get_name() override { return "USMSharedMemoryProvider"; } + + protected: + ur_usm_host_desc_t hostDesc = {UR_STRUCTURE_TYPE_USM_HOST_DESC, nullptr, 0}; + ur_usm_device_desc_t deviceDesc = {UR_STRUCTURE_TYPE_USM_DEVICE_DESC, + nullptr, 0}; + ur_usm_alloc_location_desc_t allocLocation = { + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC, nullptr, 0}; + ur_usm_desc_t usmDesc = {UR_STRUCTURE_TYPE_USM_DESC, nullptr, 0, 0}; +}; + +// Allocation routines for device memory type +class USMDeviceMemoryProvider : public USMMemoryProvider { + public: + umf_result_t initialize(usm::pool_descriptor Desc) override; + const char *get_name() override { return "USMDeviceMemoryProvider"; } + + protected: + ur_usm_device_desc_t deviceDesc = {UR_STRUCTURE_TYPE_USM_DEVICE_DESC, + nullptr, 0}; + ur_usm_alloc_location_desc_t allocLocation = { + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC, nullptr, 0}; + ur_usm_desc_t usmDesc = {UR_STRUCTURE_TYPE_USM_DESC, nullptr, 0, 0}; +}; + +// Allocation routines for host memory type +class USMHostMemoryProvider : public USMMemoryProvider { + public: + umf_result_t initialize(usm::pool_descriptor Desc) override; + const char *get_name() override { return "USMHostMemoryProvider"; } + + protected: + ur_usm_host_desc_t hostDesc = {UR_STRUCTURE_TYPE_USM_HOST_DESC, nullptr, 0}; + ur_usm_alloc_location_desc_t allocLocation = { + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC, nullptr, 0}; + ur_usm_desc_t usmDesc = {UR_STRUCTURE_TYPE_USM_DESC, nullptr, 0, 0}; +}; + } // namespace umf #endif /* UMF_HELPERS_H */ diff --git a/source/common/ur_pool_handle.hpp b/source/common/ur_pool_handle.hpp new file mode 100644 index 0000000000..24abd2330b --- /dev/null +++ b/source/common/ur_pool_handle.hpp @@ -0,0 +1,152 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include "disjoint_pool.hpp" +#include "ur_pool_manager.hpp" +#include + +namespace usm { + +template +std::pair +createUMFPoolForDesc(usm::pool_descriptor &Desc, Args &&...args) { + umf_result_t UmfRet = UMF_RESULT_SUCCESS; + umf::provider_unique_handle_t MemProvider = nullptr; + + switch (Desc.type) { + case UR_USM_TYPE_HOST: { + std::tie(UmfRet, MemProvider) = + umf::memoryProviderMakeUnique(Desc); + break; + } + case UR_USM_TYPE_DEVICE: { + std::tie(UmfRet, MemProvider) = + umf::memoryProviderMakeUnique(Desc); + break; + } + case UR_USM_TYPE_SHARED: { + std::tie(UmfRet, MemProvider) = + umf::memoryProviderMakeUnique(Desc); + break; + } + default: + UmfRet = UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (UmfRet) { + return std::pair{ + umf::umf2urResult(UmfRet), nullptr}; + } + + umf::pool_unique_handle_t Pool = nullptr; + std::tie(UmfRet, Pool) = + umf::poolMakeUnique({std::move(MemProvider)}, args...); + + return std::pair{ + umf::umf2urResult(UmfRet), std::move(Pool)}; +} + +template +struct pool_handle_base { + pool_handle_base(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) + : Context(Context), PoolFlags(PoolDesc->flags) { + const void *pNext = PoolDesc->pNext; + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = + static_cast(pNext); + switch (BaseDesc->stype) { + case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { + const ur_usm_pool_limits_desc_t *Limits = + reinterpret_cast( + BaseDesc); + for (auto &config : DisjointPoolConfigs.Configs) { + config.MaxPoolableSize = Limits->maxPoolableSize; + config.SlabMinSize = Limits->minDriverAllocSize; + } + break; + } + default: { + throw umf::UsmAllocationException( + UR_RESULT_ERROR_INVALID_ARGUMENT); + } + } + pNext = BaseDesc->pNext; + } + + ur_result_t Ret; + std::tie(Ret, PoolManager) = + usm::pool_manager::create(); + if (Ret) { + throw umf::UsmAllocationException(Ret); + } + + auto UrUSMPool = reinterpret_cast(this); + + // We set up our pool handle with default pools for each of the three + // allocation types + std::vector Descs; + std::tie(Ret, Descs) = + usm::pool_descriptor::createDefaults(UrUSMPool, Context); + + for (auto &Desc : Descs) { + umf::pool_unique_handle_t Pool = nullptr; + auto PoolType = usm::urTypeToDisjointPoolType(Desc.type); + std::tie(Ret, Pool) = + createUMFPoolForDesc( + Desc, this->DisjointPoolConfigs.Configs[PoolType]); + if (Ret) { + throw umf::UsmAllocationException(Ret); + } + + PoolManager.addPool(Desc, Pool); + } + } + + std::atomic_uint32_t RefCount = 1; + ur_context_handle_t Context = nullptr; + usm::DisjointPoolAllConfigs DisjointPoolConfigs = + usm::DisjointPoolAllConfigs(); + usm::pool_manager PoolManager; + ur_usm_pool_flags_t PoolFlags; + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + uint32_t getReferenceCount() const noexcept { return RefCount; } + + bool hasUMFPool(umf_memory_pool_t *umf_pool) { + return PoolManager.hasPool(umf_pool); + } + + std::optional + getOrCreatePool(usm::pool_descriptor &desc) { + auto foundPool = PoolManager.getPool(desc); + if (foundPool.has_value()) { + return foundPool.value(); + } + + umf::pool_unique_handle_t newPool; + ur_result_t Ret = UR_RESULT_SUCCESS; + auto PoolType = usm::urTypeToDisjointPoolType(desc.type); + std::tie(Ret, newPool) = + createUMFPoolForDesc( + desc, this->DisjointPoolConfigs.Configs[PoolType]); + if (Ret) { + throw umf::UsmAllocationException(Ret); + } + + PoolManager.addPool(desc, newPool); + // addPool std::moves newPool so we can't just return that + return PoolManager.getPool(desc); + } +}; +} // namespace usm diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index 4accd55631..f1856304fe 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -8,12 +8,12 @@ * */ +#include "disjoint_pool_config_parser.hpp" #ifndef USM_POOL_MANAGER_HPP #define USM_POOL_MANAGER_HPP 1 #include "logger/ur_logger.hpp" #include "umf_helpers.hpp" -#include "umf_pools/disjoint_pool.hpp" #include "ur_api.h" #include "ur_util.hpp" @@ -28,18 +28,59 @@ namespace usm { /// @brief describes an internal USM pool instance. struct pool_descriptor { - ur_usm_pool_handle_t poolHandle; + pool_descriptor(ur_usm_pool_handle_t poolHandle, + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_usm_type_t type, const ur_usm_desc_t *pDesc) + : poolHandle(poolHandle), hContext(hContext), hDevice(hDevice), + type(type) { + const ur_base_desc_t *Next = + pDesc ? static_cast(pDesc->pNext) : nullptr; + while (Next) { + switch (Next->stype) { + case UR_STRUCTURE_TYPE_USM_HOST_DESC: { + const auto *hostDesc = + reinterpret_cast(Next); + hostFlags |= hostDesc->flags; + break; + } + case UR_STRUCTURE_TYPE_USM_DEVICE_DESC: { + const auto *deviceDesc = + reinterpret_cast(Next); + deviceFlags |= deviceDesc->flags; + break; + } + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + allocLocation = + *reinterpret_cast( + Next); + break; + } + default: + break; + } + Next = Next->pNext + ? static_cast(Next->pNext) + : nullptr; + } + } + ur_usm_pool_handle_t poolHandle; ur_context_handle_t hContext; ur_device_handle_t hDevice; ur_usm_type_t type; - bool deviceReadOnly; + ur_usm_host_mem_flags_t hostFlags = 0; + ur_usm_device_mem_flags_t deviceFlags = 0; + std::optional allocLocation; bool operator==(const pool_descriptor &other) const; friend std::ostream &operator<<(std::ostream &os, const pool_descriptor &desc); + + // Returns default initialized pool descriptors for host, device and shared + // allocations. static std::pair> - create(ur_usm_pool_handle_t poolHandle, ur_context_handle_t hContext); + createDefaults(ur_usm_pool_handle_t poolHandle, + ur_context_handle_t hContext); }; static inline std::pair> @@ -129,11 +170,6 @@ urGetAllDevicesAndSubDevices(ur_context_handle_t hContext) { return {UR_RESULT_SUCCESS, devicesAndSubDevices}; } -static inline bool -isSharedAllocationReadOnlyOnDevice(const pool_descriptor &desc) { - return desc.type == UR_USM_TYPE_SHARED && desc.deviceReadOnly; -} - inline bool pool_descriptor::operator==(const pool_descriptor &other) const { const pool_descriptor &lhs = *this; const pool_descriptor &rhs = other; @@ -158,9 +194,20 @@ inline bool pool_descriptor::operator==(const pool_descriptor &other) const { } } + if (lhs.allocLocation.has_value()) { + if (!rhs.allocLocation.has_value()) { + return false; + } + + if (lhs.allocLocation.value().location != + rhs.allocLocation.value().location) { + return false; + } + } + return lhsNative == rhsNative && lhs.type == rhs.type && - (isSharedAllocationReadOnlyOnDevice(lhs) == - isSharedAllocationReadOnlyOnDevice(rhs)) && + lhs.hostFlags == rhs.hostFlags && + lhs.deviceFlags == rhs.deviceFlags && lhs.poolHandle == rhs.poolHandle; } @@ -168,53 +215,62 @@ inline std::ostream &operator<<(std::ostream &os, const pool_descriptor &desc) { os << "pool handle: " << desc.poolHandle << " context handle: " << desc.hContext << " device handle: " << desc.hDevice << " memory type: " << desc.type - << " is read only: " << desc.deviceReadOnly; + << " is read only: " + << (desc.deviceFlags & UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY); return os; } inline std::pair> -pool_descriptor::create(ur_usm_pool_handle_t poolHandle, - ur_context_handle_t hContext) { +pool_descriptor::createDefaults(ur_usm_pool_handle_t poolHandle, + ur_context_handle_t hContext) { auto [ret, devices] = urGetAllDevicesAndSubDevices(hContext); if (ret != UR_RESULT_SUCCESS) { return {ret, {}}; } std::vector descriptors; - pool_descriptor &desc = descriptors.emplace_back(); - desc.poolHandle = poolHandle; - desc.hContext = hContext; - desc.type = UR_USM_TYPE_HOST; + descriptors.emplace_back(poolHandle, hContext, nullptr, UR_USM_TYPE_HOST, + nullptr); for (auto &device : devices) { { - pool_descriptor &desc = descriptors.emplace_back(); - desc.poolHandle = poolHandle; - desc.hContext = hContext; - desc.hDevice = device; - desc.type = UR_USM_TYPE_DEVICE; + descriptors.emplace_back(poolHandle, hContext, device, + UR_USM_TYPE_DEVICE, nullptr); } { - pool_descriptor &desc = descriptors.emplace_back(); - desc.poolHandle = poolHandle; - desc.hContext = hContext; - desc.type = UR_USM_TYPE_SHARED; - desc.hDevice = device; - desc.deviceReadOnly = false; + descriptors.emplace_back(poolHandle, hContext, device, + UR_USM_TYPE_SHARED, nullptr); } { - pool_descriptor &desc = descriptors.emplace_back(); - desc.poolHandle = poolHandle; - desc.hContext = hContext; - desc.type = UR_USM_TYPE_SHARED; - desc.hDevice = device; - desc.deviceReadOnly = true; + ur_usm_device_desc_t deviceDesc = { + UR_STRUCTURE_TYPE_USM_DEVICE_DESC, nullptr, + UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY}; + ur_usm_desc_t desc = {UR_STRUCTURE_TYPE_USM_DESC, &deviceDesc, 0, + 0}; + + descriptors.emplace_back(poolHandle, hContext, device, + UR_USM_TYPE_SHARED, &desc); } } return {ret, descriptors}; } +inline usm::DisjointPoolMemType +urTypeToDisjointPoolType(const ur_usm_type_t &type) { + switch (type) { + case UR_USM_TYPE_HOST: + return usm::DisjointPoolMemType::Host; + case UR_USM_TYPE_DEVICE: + return usm::DisjointPoolMemType::Device; + case UR_USM_TYPE_SHARED: + return usm::DisjointPoolMemType::Shared; + default: + // Added to suppress 'not all control paths return a value' warning. + return usm::DisjointPoolMemType::All; + } +} + template struct pool_manager { private: using desc_to_pool_map_t = std::unordered_map; @@ -253,7 +309,6 @@ template struct pool_manager { "Pool descriptor: {}, doesn't match any existing pool", desc); return std::nullopt; } - return it->second.get(); } @@ -278,9 +333,11 @@ template <> struct hash { } } - return combine_hashes(0, desc.type, native, - isSharedAllocationReadOnlyOnDevice(desc), - desc.poolHandle); + return combine_hashes( + 0, desc.type, native, desc.allocLocation.has_value(), + desc.allocLocation.has_value() ? desc.allocLocation.value().location + : 0, + desc.deviceFlags, desc.hostFlags, desc.poolHandle); } }; diff --git a/test/usm/usmPoolManager.cpp b/test/usm/usmPoolManager.cpp index 6d2eb33bfe..37b5ebb012 100644 --- a/test/usm/usmPoolManager.cpp +++ b/test/usm/usmPoolManager.cpp @@ -19,7 +19,7 @@ TEST_P(urUsmPoolDescriptorTest, poolIsPerContextTypeAndDevice) { auto poolHandle = this->GetParam(); auto [ret, pool_descriptors] = - usm::pool_descriptor::create(poolHandle, this->context); + usm::pool_descriptor::createDefaults(poolHandle, this->context); ASSERT_EQ(ret, UR_RESULT_SUCCESS); size_t hostPools = 0; @@ -56,7 +56,8 @@ INSTANTIATE_TEST_SUITE_P(urUsmPoolDescriptorTest, urUsmPoolDescriptorTest, struct urUsmPoolManagerTest : public uur::urContextTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urContextTest::SetUp()); - auto [ret, descs] = usm::pool_descriptor::create(nullptr, context); + auto [ret, descs] = + usm::pool_descriptor::createDefaults(nullptr, context); ASSERT_EQ(ret, UR_RESULT_SUCCESS); poolDescriptors = std::move(descs); }