diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index cf97419035..c7cfef0f3b 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -41,8 +41,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) set_target_properties(${TARGET_NAME} PROPERTIES @@ -83,7 +81,3 @@ target_link_libraries(${TARGET_NAME} PRIVATE Threads::Threads cudadrv ) - -target_include_directories(${TARGET_NAME} PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../" -) diff --git a/source/adapters/cuda/adapter.cpp b/source/adapters/cuda/adapter.cpp index 5b897a8768..d4c30eb0b1 100644 --- a/source/adapters/cuda/adapter.cpp +++ b/source/adapters/cuda/adapter.cpp @@ -67,7 +67,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_ADAPTER_INFO_BACKEND: diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 18264410c4..26e4c0e381 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -8,7 +8,6 @@ // //===----------------------------------------------------------------------===// -#include #include #include "context.hpp" diff --git a/source/adapters/cuda/common.hpp b/source/adapters/cuda/common.hpp index 67223c45bc..74471ea791 100644 --- a/source/adapters/cuda/common.hpp +++ b/source/adapters/cuda/common.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include ur_result_t mapErrorUR(CUresult Result); diff --git a/source/adapters/cuda/context.cpp b/source/adapters/cuda/context.cpp index 40ae0ce4ad..df9156e078 100644 --- a/source/adapters/cuda/context.cpp +++ b/source/adapters/cuda/context.cpp @@ -68,7 +68,7 @@ urContextCreate(uint32_t DeviceCount, const ur_device_handle_t *phDevices, UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( ur_context_handle_t hContext, ur_context_info_t ContextInfoType, size_t propSize, void *pContextInfo, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet); switch (static_cast(ContextInfoType)) { case UR_CONTEXT_INFO_NUM_DEVICES: diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index acea59e1a1..456c2a92bc 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -42,7 +42,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t propSize, void *pPropValue, size_t *pPropSizeRet) try { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); static constexpr uint32_t MaxWorkItemDimensions = 3u; diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp index 08a1b5852a..86cb926590 100644 --- a/source/adapters/cuda/device.hpp +++ b/source/adapters/cuda/device.hpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include "common.hpp" diff --git a/source/adapters/cuda/event.cpp b/source/adapters/cuda/event.cpp index 804b35a9b7..0e8264cf23 100644 --- a/source/adapters/cuda/event.cpp +++ b/source/adapters/cuda/event.cpp @@ -162,7 +162,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, size_t propValueSize, void *pPropValue, size_t *pPropValueSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); switch (propName) { case UR_EVENT_INFO_COMMAND_QUEUE: @@ -187,7 +187,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propValueSize, void *pPropValue, size_t *pPropValueSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); ur_queue_handle_t Queue = hEvent->getQueue(); if (Queue == nullptr || !(Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE)) { diff --git a/source/adapters/cuda/event.hpp b/source/adapters/cuda/event.hpp index 390fd7833a..f81b8fee0d 100644 --- a/source/adapters/cuda/event.hpp +++ b/source/adapters/cuda/event.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include #include "common.hpp" #include "queue.hpp" diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 3168c008a3..ff9921c754 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -20,7 +20,6 @@ #include "memory.hpp" #include "queue.hpp" #include "sampler.hpp" -#include "ur/ur.hpp" #include "ur_api.h" ur_result_t urCalculateNumChannels(ur_image_channel_order_t order, diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp index eaaa3ef368..5841ec9588 100644 --- a/source/adapters/cuda/kernel.cpp +++ b/source/adapters/cuda/kernel.cpp @@ -62,7 +62,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { @@ -205,7 +205,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pKernelInfo, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet); switch (propName) { case UR_KERNEL_INFO_FUNCTION_NAME: @@ -237,7 +237,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_sub_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: { // Sub-group size is equivalent to warp size diff --git a/source/adapters/cuda/memory.cpp b/source/adapters/cuda/memory.cpp index 824ab1f580..59c206840d 100644 --- a/source/adapters/cuda/memory.cpp +++ b/source/adapters/cuda/memory.cpp @@ -175,7 +175,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, size_t *pPropSizeRet) { UR_ASSERT(hMemory->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); ScopedContext Active(hMemory->getContext()); diff --git a/source/adapters/cuda/physical_mem.hpp b/source/adapters/cuda/physical_mem.hpp index 0ce332e112..d608f7bab0 100644 --- a/source/adapters/cuda/physical_mem.hpp +++ b/source/adapters/cuda/physical_mem.hpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include diff --git a/source/adapters/cuda/platform.cpp b/source/adapters/cuda/platform.cpp index f37af1149b..17d8f5011c 100644 --- a/source/adapters/cuda/platform.cpp +++ b/source/adapters/cuda/platform.cpp @@ -22,7 +22,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( size_t Size, void *pPlatformInfo, size_t *pSizeRet) { UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UrReturnHelper ReturnValue(Size, pPlatformInfo, pSizeRet); + ur::ReturnHelper ReturnValue(Size, pPlatformInfo, pSizeRet); switch (PlatformInfoType) { case UR_PLATFORM_INFO_NAME: diff --git a/source/adapters/cuda/platform.hpp b/source/adapters/cuda/platform.hpp index 5da72057ab..f488ca4651 100644 --- a/source/adapters/cuda/platform.hpp +++ b/source/adapters/cuda/platform.hpp @@ -9,7 +9,9 @@ //===----------------------------------------------------------------------===// #pragma once -#include + +#include "ur_api.h" +#include #include struct ur_platform_handle_t_ { diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 022fd258f7..d52ac6c0c6 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -339,7 +339,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, void *pPropValue, size_t *pPropSizeRet) { std::ignore = hDevice; - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_PROGRAM_BUILD_INFO_STATUS: { @@ -361,7 +361,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pProgramInfo, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet); switch (propName) { case UR_PROGRAM_INFO_REFERENCE_COUNT: diff --git a/source/adapters/cuda/queue.cpp b/source/adapters/cuda/queue.cpp index 120d665524..002389b2c3 100644 --- a/source/adapters/cuda/queue.cpp +++ b/source/adapters/cuda/queue.cpp @@ -291,7 +291,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, size_t propValueSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); switch (propName) { case UR_QUEUE_INFO_CONTEXT: diff --git a/source/adapters/cuda/queue.hpp b/source/adapters/cuda/queue.hpp index c79ca18a9b..ce067c3de7 100644 --- a/source/adapters/cuda/queue.hpp +++ b/source/adapters/cuda/queue.hpp @@ -9,10 +9,12 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include +#include #include +#include #include using ur_stream_guard_ = std::unique_lock; diff --git a/source/adapters/cuda/sampler.cpp b/source/adapters/cuda/sampler.cpp index 5ebccf516b..4e4f787529 100644 --- a/source/adapters/cuda/sampler.cpp +++ b/source/adapters/cuda/sampler.cpp @@ -49,7 +49,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, size_t propValueSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); switch (propName) { case UR_SAMPLER_INFO_REFERENCE_COUNT: diff --git a/source/adapters/cuda/sampler.hpp b/source/adapters/cuda/sampler.hpp index 74b5511249..68f63cf2c1 100644 --- a/source/adapters/cuda/sampler.hpp +++ b/source/adapters/cuda/sampler.hpp @@ -8,7 +8,9 @@ // //===----------------------------------------------------------------------===// -#include +#include + +#include /// Implementation of samplers for CUDA /// diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 8929fb7fa1..8babe7e0da 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -196,7 +196,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, void *pPropValue, size_t *pPropValueSizeRet) { ur_result_t Result = UR_RESULT_SUCCESS; - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); try { ScopedContext Active(hContext); @@ -486,7 +486,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( size_t *pPropSizeRet ///< [out][optional] size in bytes returned in pool ///< property value ) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_USM_POOL_INFO_REFERENCE_COUNT: { diff --git a/source/adapters/cuda/usm_p2p.cpp b/source/adapters/cuda/usm_p2p.cpp index b80aa80854..8a7000a84b 100644 --- a/source/adapters/cuda/usm_p2p.cpp +++ b/source/adapters/cuda/usm_p2p.cpp @@ -42,7 +42,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); int value; CUdevice_P2PAttribute cu_attr; diff --git a/source/adapters/cuda/virtual_mem.cpp b/source/adapters/cuda/virtual_mem.cpp index 9c37dda4fb..6e86a7620a 100644 --- a/source/adapters/cuda/virtual_mem.cpp +++ b/source/adapters/cuda/virtual_mem.cpp @@ -20,7 +20,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); ScopedContext Active(hContext); switch (propName) { @@ -108,7 +108,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( ur_context_handle_t hContext, const void *pStart, [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); ScopedContext Active(hContext); switch (propName) { diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 90162eb2de..7f251160a6 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -83,8 +83,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) if(NOT MSVC) @@ -184,7 +182,3 @@ elseif("${UR_HIP_PLATFORM}" STREQUAL "NVIDIA") else() message(FATAL_ERROR "Unspecified UR HIP platform please set UR_HIP_PLATFORM to 'AMD' or 'NVIDIA'") endif() - -target_include_directories(${TARGET_NAME} PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../" -) diff --git a/source/adapters/hip/adapter.cpp b/source/adapters/hip/adapter.cpp index 4691d78913..a76421f6f8 100644 --- a/source/adapters/hip/adapter.cpp +++ b/source/adapters/hip/adapter.cpp @@ -56,7 +56,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_ADAPTER_INFO_BACKEND: diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index 96d0e8e34c..5f485db181 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include /// Stub implementation of command-buffers for HIP diff --git a/source/adapters/hip/common.hpp b/source/adapters/hip/common.hpp index be332c280b..c55d240ce4 100644 --- a/source/adapters/hip/common.hpp +++ b/source/adapters/hip/common.hpp @@ -17,7 +17,8 @@ #endif #endif #include -#include +#include +#include // Before ROCm 6, hipify doesn't support cuArrayGetDescriptor, on AMD the // hipArray can just be indexed, but on NVidia it is an opaque type and needs to diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 73ac777edb..6c5894c02c 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -72,7 +72,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (uint32_t{propName}) { case UR_CONTEXT_INFO_NUM_DEVICES: diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index e40470f9aa..5d49756019 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -25,7 +25,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); static constexpr uint32_t MaxWorkItemDimensions = 3u; diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index 181c5a7bdb..e1934f5768 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -11,7 +11,7 @@ #include "common.hpp" -#include +#include /// UR device mapping to a hipDevice_t. /// Includes an observer pointer to the platform, diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index c24287749e..43ae2cad85 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -185,7 +185,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( std::unique_ptr RetImplEvent{nullptr}; - ur_lock MemoryMigrationLock{hBuffer->MemoryMigrationMutex}; + ur::Lock MemoryMigrationLock{hBuffer->MemoryMigrationMutex}; auto Device = hQueue->getDevice(); hipStream_t HIPStream = hQueue->getNextTransferStream(); @@ -254,7 +254,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( std::vector DepEvents( phEventWaitList, phEventWaitList + numEventsInWaitList); - std::vector> MemMigrationLocks; + std::vector> MemMigrationLocks; // phEventWaitList only contains events that are handed to UR by the SYCL // runtime. However since UR handles memory dependencies within a context @@ -277,8 +277,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( [MemArg](auto &Lock) { return Lock.first == MemArg.Mem; }) == MemMigrationLocks.end()) - MemMigrationLocks.emplace_back( - std::pair{MemArg.Mem, ur_lock{MemArg.Mem->MemoryMigrationMutex}}); + MemMigrationLocks.emplace_back(std::pair{ + MemArg.Mem, ur::Lock{MemArg.Mem->MemoryMigrationMutex}}); } } } @@ -632,7 +632,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( std::unique_ptr RetImplEvent{nullptr}; ur_result_t Result = UR_RESULT_SUCCESS; - ur_lock MemoryMigrationLock(hBuffer->MemoryMigrationMutex); + ur::Lock MemoryMigrationLock(hBuffer->MemoryMigrationMutex); auto Device = hQueue->getDevice(); hipStream_t HIPStream = hQueue->getNextTransferStream(); @@ -1026,7 +1026,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_lock MemoryMigrationLock{hImage->MemoryMigrationMutex}; + ur::Lock MemoryMigrationLock{hImage->MemoryMigrationMutex}; auto Device = hQueue->getDevice(); hipStream_t HIPStream = hQueue->getNextTransferStream(); diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index 313212724a..5d56730ab7 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -216,7 +216,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, size_t *pPropValueSizeRet) { UR_ASSERT(!(pPropValue && propValueSize == 0), UR_RESULT_ERROR_INVALID_SIZE); - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); switch (propName) { case UR_EVENT_INFO_COMMAND_QUEUE: return ReturnValue(hEvent->getQueue()); @@ -253,7 +253,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; } - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); switch (propName) { case UR_PROFILING_INFO_COMMAND_QUEUED: case UR_PROFILING_INFO_COMMAND_SUBMIT: diff --git a/source/adapters/hip/image.cpp b/source/adapters/hip/image.cpp index a8fcd6f465..2b69554661 100644 --- a/source/adapters/hip/image.cpp +++ b/source/adapters/hip/image.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include "ur/ur.hpp" +#include "ur_api.h" UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( [[maybe_unused]] ur_context_handle_t hContext, diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index 66b9fe4403..63a839346d 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -57,7 +57,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { @@ -190,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pKernelInfo, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet); switch (propName) { case UR_KERNEL_INFO_FUNCTION_NAME: @@ -216,7 +216,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_sub_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: { // Sub-group size is equivalent to warp size diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 7be8f3f9c1..0f45bbc140 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -221,7 +221,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, auto Device = hMemory->getContext()->getDevices()[0]; ScopedContext Active(Device); - UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); switch (MemInfoType) { case UR_MEM_INFO_SIZE: { @@ -366,7 +366,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, // FIXME: only getting infor for first image in ctx auto Device = hMemory->getContext()->getDevices()[0]; ScopedContext Active(Device); - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); try { HIP_ARRAY3D_DESCRIPTOR ArrayInfo; @@ -439,7 +439,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, const ur_device_handle_t hDevice) { ScopedContext Active(hDevice); - ur_lock LockGuard(Mem->MemoryAllocationMutex); + ur::Lock LockGuard(Mem->MemoryAllocationMutex); if (Mem->isBuffer()) { auto &Buffer = std::get(Mem->Mem); diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 7707794b3c..9d210ca3ab 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -397,8 +397,8 @@ struct ur_mem_handle_t_ { // Enumerates all possible types of accesses. enum access_mode_t { unknown, read_write, read_only, write_only }; - ur_mutex MemoryAllocationMutex; // A mutex for allocations - ur_mutex MemoryMigrationMutex; // A mutex for memory transfers + ur::Mutex MemoryAllocationMutex; // A mutex for allocations + ur::Mutex MemoryMigrationMutex; // A mutex for memory transfers /// A UR Memory object represents either plain memory allocations ("Buffers" /// in OpenCL) or typed allocations ("Images" in OpenCL). diff --git a/source/adapters/hip/platform.cpp b/source/adapters/hip/platform.cpp index 287f941c30..d41e4f3a2d 100644 --- a/source/adapters/hip/platform.cpp +++ b/source/adapters/hip/platform.cpp @@ -16,7 +16,7 @@ hipEvent_t ur_platform_handle_t_::EvBase{nullptr}; UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo(ur_platform_handle_t, ur_platform_info_t propName, size_t propSize, void *pPropValue, size_t *pSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); switch (propName) { case UR_PLATFORM_INFO_NAME: diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 81f1be1194..efc2b99fed 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -320,7 +320,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t, ur_program_build_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { // Ignore unused parameter - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_PROGRAM_BUILD_INFO_STATUS: { @@ -339,7 +339,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t, UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pProgramInfo, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet); switch (propName) { case UR_PROGRAM_INFO_REFERENCE_COUNT: diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index 6e6496fec1..1ab47c3c28 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -160,7 +160,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, size_t propValueSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); switch (propName) { case UR_QUEUE_INFO_CONTEXT: return ReturnValue(hQueue->Context); diff --git a/source/adapters/hip/sampler.cpp b/source/adapters/hip/sampler.cpp index 1ee1996164..79a2617822 100644 --- a/source/adapters/hip/sampler.cpp +++ b/source/adapters/hip/sampler.cpp @@ -34,7 +34,7 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, size_t propValueSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); switch (propName) { case UR_SAMPLER_INFO_REFERENCE_COUNT: diff --git a/source/adapters/hip/sampler.hpp b/source/adapters/hip/sampler.hpp index df2da74db7..d6e9adc925 100644 --- a/source/adapters/hip/sampler.hpp +++ b/source/adapters/hip/sampler.hpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include "context.hpp" diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 9d084f7b4e..b7de7cec67 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -154,7 +154,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_result_t Result = UR_RESULT_SUCCESS; hipPointerAttribute_t hipPointerAttributeType; - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); + ur::ReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); try { switch (propName) { @@ -430,7 +430,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( size_t *pPropSizeRet ///< [out][optional] size in bytes returned in pool ///< property value ) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_USM_POOL_INFO_REFERENCE_COUNT: { diff --git a/source/adapters/hip/usm_p2p.cpp b/source/adapters/hip/usm_p2p.cpp index 65635dc910..37428f7a61 100644 --- a/source/adapters/hip/usm_p2p.cpp +++ b/source/adapters/hip/usm_p2p.cpp @@ -27,7 +27,7 @@ urUsmP2PDisablePeerAccessExp(ur_device_handle_t, ur_device_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // Zero return value indicates that all of the queries currently return false. return ReturnValue(uint32_t{0}); } diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 250eaccab2..b1113976bc 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -119,7 +119,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) # TODO: fix level_zero adapter conversion warnings @@ -141,6 +140,5 @@ target_link_libraries(${TARGET_NAME} PRIVATE ) target_include_directories(${TARGET_NAME} PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../" LevelZeroLoader-Headers ) diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index d43ae07cdb..eec9f9b80d 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -173,7 +173,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, size_t PropSize, void *PropValue, size_t *PropSizeRet) { - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + ur::ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { case UR_ADAPTER_INFO_BACKEND: diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bbe49cb705..3c563f0f4a 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -511,7 +511,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { // Lock automatically releases when this goes out of scope. - std::scoped_lock Lock( + std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex); if (GlobalWorkOffset != NULL) { @@ -598,8 +598,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( auto SrcBuffer = ur_cast(SrcMem); auto DstBuffer = ur_cast(DstMem); - std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex> LockAll( + std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex> LockAll( SrcLock, DstBuffer->Mutex); char *ZeHandleSrc; @@ -626,8 +626,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( auto SrcBuffer = ur_cast(SrcMem); auto DstBuffer = ur_cast(DstMem); - std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex> LockAll( + std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex> LockAll( SrcLock, DstBuffer->Mutex); char *ZeHandleSrc; @@ -649,7 +649,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::scoped_lock Lock(Buffer->Mutex); + std::scoped_lock Lock(Buffer->Mutex); char *ZeHandleDst = nullptr; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -670,7 +670,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::scoped_lock Lock(Buffer->Mutex); + std::scoped_lock Lock(Buffer->Mutex); char *ZeHandleDst = nullptr; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -687,7 +687,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::scoped_lock SrcLock(Buffer->Mutex); + std::scoped_lock SrcLock(Buffer->Mutex); char *ZeHandleSrc = nullptr; UR_CALL(Buffer->getZeHandle(ZeHandleSrc, ur_mem_handle_t_::read_only, @@ -705,7 +705,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::scoped_lock SrcLock(Buffer->Mutex); + std::scoped_lock SrcLock(Buffer->Mutex); char *ZeHandleSrc; UR_CALL(Buffer->getZeHandle(ZeHandleSrc, ur_mem_handle_t_::read_only, @@ -824,7 +824,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::scoped_lock Lock(Buffer->Mutex); + std::scoped_lock Lock(Buffer->Mutex); char *ZeHandleDst = nullptr; _ur_buffer *UrBuffer = reinterpret_cast<_ur_buffer *>(Buffer); @@ -862,7 +862,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; } - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); // Use compute engine rather than copy engine const auto UseCopyEngine = false; auto &QGroup = Queue->getQueueGroup(UseCopyEngine); diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index a43e9e4c52..5da3729b43 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index 5c363a5984..666af37111 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include @@ -261,11 +261,6 @@ bool setEnvVar(const char *name, const char *value); // Prints to stderr if UR_L0_DEBUG allows it void urPrint(const char *Format, ...); -// Helper for one-liner validation -#define UR_ASSERT(condition, error) \ - if (!(condition)) \ - return error; - // Returns the ze_structure_type_t to use in .stype of a structured descriptor. // Intentionally not defined; will give an error if no proper specialization template ze_structure_type_t getZeStructureType(); @@ -306,11 +301,6 @@ bool setEnvVar(const char *name, const char *value); // Prints to stderr if UR_L0_DEBUG allows it void urPrint(const char *Format, ...); -// Helper for one-liner validation -#define UR_ASSERT(condition, error) \ - if (!(condition)) \ - return error; - // Map Level Zero runtime error code to UR error code. ur_result_t ze2urResult(ze_result_t ZeResult); @@ -389,7 +379,7 @@ struct _ur_object { // access to Obj3 in a scope use the following approach: // std::shared_lock Obj3Lock(Obj3->Mutex, std::defer_lock); // std::scoped_lock LockAll(Obj1->Mutex, Obj2->Mutex, Obj3Lock); - ur_shared_mutex Mutex; + ur::SharedMutex Mutex; // Indicates if we own the native handle or it came from interop that // asked to not transfer the ownership to SYCL RT. diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 2bd893b043..741b6d8289 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -39,7 +39,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( Context->initialize(); *RetContext = reinterpret_cast(Context); if (IndirectAccessTrackingEnabled) { - std::scoped_lock Lock(Platform->ContextsMutex); + std::scoped_lock Lock(Platform->ContextsMutex); Platform->Contexts.push_back(*RetContext); } } catch (const std::bad_alloc &) { @@ -63,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRelease( ur_context_handle_t Context ///< [in] handle of the context to release. ) { ur_platform_handle_t Plt = Context->getPlatform(); - std::unique_lock ContextsLock(Plt->ContextsMutex, + std::unique_lock ContextsLock(Plt->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) ContextsLock.lock(); @@ -96,8 +96,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( size_t *PropSizeRet ///< [out][optional] pointer to the actual size in ///< bytes of data queried by ContextInfoType. ) { - std::shared_lock Lock(Context->Mutex); - UrReturnHelper ReturnValue(PropSize, ContextInfo, PropSizeRet); + std::shared_lock Lock(Context->Mutex); + ur::ReturnHelper ReturnValue(PropSize, ContextInfo, PropSizeRet); switch ( (uint32_t)ContextInfoType) { // cast to avoid warnings on EXT enum values case UR_CONTEXT_INFO_DEVICES: @@ -397,7 +397,7 @@ ur_result_t ur_context_handle_t_::finalize() { // deallocated. For example, event and event pool caches would be still alive. if (!DisableEventsCaching) { - std::scoped_lock Lock(EventCacheMutex); + std::scoped_lock Lock(EventCacheMutex); for (auto &EventCache : EventCaches) { for (auto &Event : EventCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent)); @@ -410,7 +410,7 @@ ur_result_t ur_context_handle_t_::finalize() { } } { - std::scoped_lock Lock(ZeEventPoolCacheMutex); + std::scoped_lock Lock(ZeEventPoolCacheMutex); for (auto &ZePoolCache : ZeEventPoolCache) { for (auto &ZePool : ZePoolCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool)); @@ -428,7 +428,7 @@ ur_result_t ur_context_handle_t_::finalize() { if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) return ze2urResult(ZeResult); - std::scoped_lock Lock(ZeCommandListCacheMutex); + std::scoped_lock Lock(ZeCommandListCacheMutex); for (auto &List : ZeComputeCommandListCache) { for (auto &Item : List.second) { ze_command_list_handle_t ZeCommandList = Item.first; @@ -473,7 +473,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible, bool ProfilingEnabled) { // Lock while updating event pool machinery. - std::scoped_lock Lock(ZeEventPoolCacheMutex); + std::scoped_lock Lock(ZeEventPoolCacheMutex); std::list *ZePoolCache = getZeEventPoolCache(HostVisible, ProfilingEnabled); @@ -531,7 +531,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(bool HostVisible, bool WithProfiling) { - std::scoped_lock Lock(EventCacheMutex); + std::scoped_lock Lock(EventCacheMutex); auto Cache = getEventCache(HostVisible, WithProfiling); if (Cache->empty()) return nullptr; @@ -545,7 +545,7 @@ ur_context_handle_t_::getEventFromContextCache(bool HostVisible, } void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { - std::scoped_lock Lock(EventCacheMutex); + std::scoped_lock Lock(EventCacheMutex); auto Cache = getEventCache(Event->isHostVisible(), Event->isProfilingEnabled()); Cache->emplace_back(Event); @@ -553,8 +553,8 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { ur_result_t ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { - std::shared_lock EventLock(Event->Mutex, std::defer_lock); - std::scoped_lock> LockAll( + std::shared_lock EventLock(Event->Mutex, std::defer_lock); + std::scoped_lock> LockAll( ZeEventPoolCacheMutex, EventLock); if (!Event->ZeEventPool) { // This must be an interop event created on a users's pool. @@ -683,7 +683,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( { // Make sure to acquire the lock before checking the size, or there // will be a race condition. - std::scoped_lock Lock(Queue->Context->ZeCommandListCacheMutex); + std::scoped_lock Lock(Queue->Context->ZeCommandListCacheMutex); // Under mutex since operator[] does insertion on the first usage for every // unique ZeDevice. auto &ZeCommandListCache = diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 96935d470e..3c7ce32fba 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -17,8 +17,8 @@ #include #include -#include #include +#include #include #include @@ -64,11 +64,11 @@ struct ur_context_handle_t_ : _ur_object { // Mutex for the immediate command list. Per the Level Zero spec memory copy // operations submitted to an immediate command list are not allowed to be // called from simultaneous threads. - ur_mutex ImmediateCommandListMutex; + ur::Mutex ImmediateCommandListMutex; // Mutex Lock for the Command List Cache. This lock is used to control both // compute and copy command list caches. - ur_mutex ZeCommandListCacheMutex; + ur::Mutex ZeCommandListCacheMutex; // If context contains one device or sub-devices of the same device, we want // to save this device. @@ -156,10 +156,10 @@ struct ur_context_handle_t_ : _ur_object { // Mutex to control operations on event pool caches and the helper maps // holding the current pool usage counts. - ur_mutex ZeEventPoolCacheMutex; + ur::Mutex ZeEventPoolCacheMutex; // Mutex to control operations on event caches. - ur_mutex EventCacheMutex; + ur::Mutex EventCacheMutex; // Caches for events. std::vector> EventCaches{4}; diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index abdfd2e541..1af4b8a00e 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -39,7 +39,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( // Filter available devices based on input DeviceType. std::vector MatchedDevices; - std::shared_lock Lock(Platform->URDevicesCacheMutex); + std::shared_lock Lock(Platform->URDevicesCacheMutex); for (auto &D : Platform->URDevicesCache) { // Only ever return root-devices from urDeviceGet, but the // devices cache also keeps sub-devices. @@ -119,7 +119,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( size_t *pSize ///< [out][optional] pointer to the actual size in bytes of ///< the queried infoType. ) { - UrReturnHelper ReturnValue(propSize, ParamValue, pSize); + ur::ReturnHelper ReturnValue(propSize, ParamValue, pSize); ze_device_handle_t ZeDevice = Device->ZeDevice; @@ -1329,7 +1329,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // TODO: maybe we should populate cache of platforms if it wasn't already. // For now assert that is was populated. UR_ASSERT(URPlatformCachePopulated, UR_RESULT_ERROR_INVALID_VALUE); - const std::lock_guard Lock{*URPlatformsCacheMutex}; + const std::lock_guard Lock{*URPlatformsCacheMutex}; ur_device_handle_t Dev = nullptr; for (ur_platform_handle_t ThePlatform : *URPlatformsCache) { diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 3b91b70058..b50f571988 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 2dc74ff5ac..e94012f775 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -60,7 +60,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( bool UseCopyEngine = false; // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); _ur_ze_event_list_t TmpWaitList = {}; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( @@ -100,7 +100,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( // TODO: find a way to do that without blocking the host. // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); if (OutEvent) { UR_CALL(createEventAndAssociateQueue( @@ -119,7 +119,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( } if (!Queue->UsingImmCmdLists) { - std::unique_lock Lock(Queue->Mutex); + std::unique_lock Lock(Queue->Mutex); resetCommandLists(Queue); } @@ -148,7 +148,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ) { // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); // Helper function for appending a barrier to a command list. auto insertBarrierIntoCmdList = @@ -374,19 +374,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( size_t *PropValueSizeRet ///< [out][optional] bytes returned in event property ) { - UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + ur::ReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); switch (PropName) { case UR_EVENT_INFO_COMMAND_QUEUE: { - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); return ReturnValue(ur_queue_handle_t{Event->UrQueue}); } case UR_EVENT_INFO_CONTEXT: { - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); return ReturnValue(ur_context_handle_t{Event->Context}); } case UR_EVENT_INFO_COMMAND_TYPE: { - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); return ReturnValue(ur_cast(Event->CommandType)); } case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: { @@ -398,7 +398,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( auto UrQueue = Event->UrQueue; if (UrQueue) { // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(UrQueue->Mutex); + std::scoped_lock lock(UrQueue->Mutex); const auto &OpenCommandList = UrQueue->eventOpenCommandList(Event); if (OpenCommandList != UrQueue->CommandListMap.end()) { UR_CALL(UrQueue->executeOpenCommandList( @@ -418,7 +418,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( // Make sure that we query a host-visible event only. // If one wasn't yet created then don't create it here as well, and // just conservatively return that event is not yet completed. - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); auto HostVisibleEvent = Event->HostVisibleEvent; if (Event->Completed) { Result = UR_EVENT_STATUS_COMPLETE; @@ -454,7 +454,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( size_t *PropValueSizeRet ///< [out][optional] pointer to the actual size in ///< bytes returned in propValue ) { - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); if (Event->UrQueue && (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { @@ -468,7 +468,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( const uint64_t TimestampMaxValue = ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); - UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + ur::ReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); ze_kernel_timestamp_result_t tsResult; @@ -581,7 +581,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( ze_event_handle_t &ZeHostVisibleEvent) { - std::scoped_lock Lock(UrQueue->Mutex, + std::scoped_lock Lock(UrQueue->Mutex, this->Mutex); if (!HostVisibleEvent) { @@ -646,7 +646,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( auto UrQueue = Event->UrQueue; if (UrQueue) { // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(UrQueue->Mutex); + std::scoped_lock lock(UrQueue->Mutex); UR_CALL(UrQueue->executeAllOpenCommandLists()); } @@ -657,7 +657,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( ur_event_handle_t_ *Event = ur_cast(EventWaitList[I]); { - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); if (!Event->hasExternalRefs()) die("urEventWait must not be called for an internal event"); @@ -695,7 +695,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( // We waited some events above, check queue for signaled command lists and // reset them. for (auto &Q : Queues) { - std::unique_lock Lock(Q->Mutex); + std::unique_lock Lock(Q->Mutex); resetCommandLists(Q); } @@ -726,7 +726,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( *NativeEvent ///< [out] a pointer to the native handle of the event. ) { { - std::shared_lock Lock(Event->Mutex); + std::shared_lock Lock(Event->Mutex); auto *ZeEvent = ur_cast(NativeEvent); *ZeEvent = Event->ZeEvent; } @@ -735,7 +735,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( // interop app is going to wait for it. auto Queue = Event->UrQueue; if (Queue) { - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); const auto &OpenCommandList = Queue->eventOpenCommandList(Event); if (OpenCommandList != Queue->CommandListMap.end()) { UR_CALL( @@ -919,7 +919,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, std::list EventsToBeReleased; ur_queue_handle_t AssociatedQueue = nullptr; { - std::scoped_lock EventLock(Event->Mutex); + std::scoped_lock EventLock(Event->Mutex); if (SetEventCompleted) Event->Completed = true; // Exit early of event was already cleanedup. @@ -953,7 +953,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // result, memory can be deallocated and context can be removed from // container in the platform. That's why we need to lock a mutex here. ur_platform_handle_t Plt = Kernel->Program->Context->getPlatform(); - std::scoped_lock ContextsLock(Plt->ContextsMutex); + std::scoped_lock ContextsLock(Plt->ContextsMutex); if (--Kernel->SubmissionsCount == 0) { // Kernel is not submitted for execution, release referenced memory @@ -977,7 +977,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, if (AssociatedQueue) { { // Lock automatically releases when this goes out of scope. - std::unique_lock QueueLock(AssociatedQueue->Mutex, + std::unique_lock QueueLock(AssociatedQueue->Mutex, std::defer_lock); if (!QueueLocked) QueueLock.lock(); @@ -1011,7 +1011,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_kernel_handle_t DepEventKernel = nullptr; { - std::scoped_lock DepEventLock(DepEvent->Mutex); + std::scoped_lock DepEventLock(DepEvent->Mutex); DepEvent->WaitList.collectEventsForReleaseAndDestroyUrZeEventList( EventsToBeReleased); if (IndirectAccessTrackingEnabled) { @@ -1180,7 +1180,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( if (IncludeLastCommandEvent) { this->ZeEventList = new ze_event_handle_t[EventListLength + 1]; this->UrEventList = new ur_event_handle_t[EventListLength + 1]; - std::shared_lock Lock(CurQueue->LastCommandEvent->Mutex); + std::shared_lock Lock(CurQueue->LastCommandEvent->Mutex); this->ZeEventList[0] = CurQueue->LastCommandEvent->ZeEvent; this->UrEventList[0] = CurQueue->LastCommandEvent; this->UrEventList[0]->RefCount.increment(); @@ -1193,7 +1193,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( if (EventListLength > 0) { for (uint32_t I = 0; I < EventListLength; I++) { { - std::shared_lock Lock(EventList[I]->Mutex); + std::shared_lock Lock(EventList[I]->Mutex); if (EventList[I]->Completed) continue; @@ -1217,8 +1217,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( // TODO: rework this to avoid deadlock when another thread is // locking the same queues but in a different order. auto Lock = ((Queue == CurQueue) - ? std::unique_lock() - : std::unique_lock(Queue->Mutex)); + ? std::unique_lock() + : std::unique_lock(Queue->Mutex)); // If the event that is going to be waited is in an open batch // different from where this next command is going to be added, @@ -1261,7 +1261,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( } } - std::shared_lock Lock(EventList[I]->Mutex); + std::shared_lock Lock(EventList[I]->Mutex); this->ZeEventList[TmpListLength] = EventList[I]->ZeEvent; this->UrEventList[TmpListLength] = EventList[I]; this->UrEventList[TmpListLength]->RefCount.increment(); diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index d4e975012c..884d421bc4 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -19,8 +19,8 @@ #include #include -#include #include +#include #include #include diff --git a/source/adapters/level_zero/image.hpp b/source/adapters/level_zero/image.hpp index d579a24708..f5d9734236 100644 --- a/source/adapters/level_zero/image.hpp +++ b/source/adapters/level_zero/image.hpp @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index dfa8915197..21521ef8f4 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -42,7 +42,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ///< this particular kernel execution instance. ) { // Lock automatically releases when this goes out of scope. - std::scoped_lock Lock( + std::scoped_lock Lock( Queue->Mutex, Kernel->Mutex, Kernel->Program->Mutex); if (GlobalWorkOffset != NULL) { if (!Queue->Device->Platform->ZeDriverGlobalOffsetExtensionFound) { @@ -215,7 +215,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // If using immediate commandlists then gathering of indirect // references and appending to the queue (which means submission) // must be done together. - std::unique_lock ContextsLock( + std::unique_lock ContextsLock( Queue->Device->Platform->ContextsMutex, std::defer_lock); // We are going to submit kernels for execution. If indirect access flag is // set for a kernel then we need to make a snapshot of existing memory @@ -275,7 +275,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); // Find global variable pointer size_t GlobalVarSize = 0; @@ -325,7 +325,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( ///< this particular kernel execution instance. ) { - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); // Find global variable pointer size_t GlobalVarSize = 0; @@ -358,7 +358,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( ur_kernel_handle_t *RetKernel ///< [out] pointer to handle of kernel object created. ) { - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); if (Program->State != ur_program_handle_t_::state::Exe) { return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; } @@ -408,7 +408,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( PArgValue = nullptr; } - std::scoped_lock Guard(Kernel->Mutex); + std::scoped_lock Guard(Kernel->Mutex); ZE2UR_CALL(zeKernelSetArgumentValue, (Kernel->ZeKernel, ArgIndex, ArgSize, PArgValue)); @@ -444,9 +444,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( ///< bytes of data being queried by propName. ) { - UrReturnHelper ReturnValue(PropSize, KernelInfo, PropSizeRet); + ur::ReturnHelper ReturnValue(PropSize, KernelInfo, PropSizeRet); - std::shared_lock Guard(Kernel->Mutex); + std::shared_lock Guard(Kernel->Mutex); switch (ParamName) { case UR_KERNEL_INFO_CONTEXT: return ReturnValue(ur_context_handle_t{Kernel->Program->Context}); @@ -502,9 +502,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( size_t *ParamValueSizeRet ///< [out][optional] pointer to the actual size in ///< bytes of data being queried by propName. ) { - UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); + ur::ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); - std::shared_lock Guard(Kernel->Mutex); + std::shared_lock Guard(Kernel->Mutex); switch (ParamName) { case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { // TODO: To revisit after level_zero/issues/262 is resolved @@ -562,9 +562,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( ) { std::ignore = Device; - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + ur::ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - std::shared_lock Guard(Kernel->Mutex); + std::shared_lock Guard(Kernel->Mutex); if (PropName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { ReturnValue(uint32_t{Kernel->ZeKernelProperties->maxSubgroupSize}); } else if (PropName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) { @@ -639,7 +639,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( std::ignore = PropSize; std::ignore = Properties; - std::scoped_lock Guard(Kernel->Mutex); + std::scoped_lock Guard(Kernel->Mutex); if (PropName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS && *(static_cast(PropValue)) == true) { // The whole point for users really was to not need to know anything @@ -680,7 +680,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( ur_sampler_handle_t ArgValue ///< [in] handle of Sampler object. ) { std::ignore = Properties; - std::scoped_lock Guard(Kernel->Mutex); + std::scoped_lock Guard(Kernel->Mutex); ZE2UR_CALL(zeKernelSetArgumentValue, (Kernel->ZeKernel, ArgIndex, sizeof(void *), &ArgValue->ZeSampler)); @@ -696,7 +696,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ) { std::ignore = Properties; - std::scoped_lock Guard(Kernel->Mutex); + std::scoped_lock Guard(Kernel->Mutex); // The ArgValue may be a NULL pointer in which case a NULL value is used for // the kernel argument declared as a pointer to global or constant memory. @@ -730,7 +730,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_native_handle_t *NativeKernel ///< [out] a pointer to the native handle of the kernel. ) { - std::shared_lock Guard(Kernel->Mutex); + std::shared_lock Guard(Kernel->Mutex); *NativeKernel = reinterpret_cast(Kernel->ZeKernel); return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index fa3ef18e47..a0989865b5 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -257,7 +257,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, ur_context_handle_t UrContext, size_t Size) { ur_platform_handle_t Plt = UrContext->getPlatform(); - std::unique_lock ContextsLock(Plt->ContextsMutex, + std::unique_lock ContextsLock(Plt->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { // Lock the mutex which is guarding contexts container in the platform. @@ -485,8 +485,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ) { ur_mem_handle_t_ *Src = ur_cast(hBuffer); - std::shared_lock SrcLock(Src->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex> LockAll( + std::shared_lock SrcLock(Src->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex> LockAll( SrcLock, Queue->Mutex); char *ZeHandleSrc = nullptr; @@ -521,7 +521,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( ) { ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); char *ZeHandleDst = nullptr; @@ -566,8 +566,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ) { ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); - std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex> LockAll( + std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex> LockAll( SrcLock, Queue->Mutex); char *ZeHandleSrc; @@ -613,7 +613,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ) { ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); char *ZeHandleDst = nullptr; @@ -652,9 +652,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( UR_ASSERT(!SrcBuffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(!DstBuffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex, - ur_shared_mutex> + std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex, + ur::SharedMutex> LockAll(SrcLock, DstBuffer->Mutex, Queue->Mutex); // Copy engine is preferred only for host to device transfer. @@ -712,9 +712,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( UR_ASSERT(!SrcBuffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(!DstBuffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex, - ur_shared_mutex> + std::shared_lock SrcLock(SrcBuffer->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex, + ur::SharedMutex> LockAll(SrcLock, DstBuffer->Mutex, Queue->Mutex); // Copy engine is preferred only for host to device transfer. @@ -755,7 +755,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); char *ZeHandleDst = nullptr; @@ -792,7 +792,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( UR_COMMAND_MEM_IMAGE_READ, Queue, Image, Dst, BlockingRead, &Origin, @@ -824,7 +824,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( UR_COMMAND_MEM_IMAGE_WRITE, Queue, Src, Image, BlockingWrite, nullptr, @@ -854,9 +854,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); - std::scoped_lock, ur_shared_mutex, - ur_shared_mutex> + std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); + std::scoped_lock, ur::SharedMutex, + ur::SharedMutex> LockAll(SrcLock, ImageDst->Mutex, Queue->Mutex); // Copy engine is preferred only for host to device transfer. // Device to device transfers run faster on compute engines. @@ -905,7 +905,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( bool UseCopyEngine = false; { // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); _ur_ze_event_list_t TmpWaitList; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( @@ -957,7 +957,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( UR_CALL(urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. - std::scoped_lock Guard(Buffer->Mutex); + std::scoped_lock Guard(Buffer->Mutex); char *ZeHandleSrc; UR_CALL(Buffer->getZeHandle(ZeHandleSrc, AccessMode, Queue->Device)); @@ -987,7 +987,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( } // Lock automatically releases when this goes out of scope. - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); if (Buffer->MapHostPtr) { @@ -1063,7 +1063,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; { // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); _ur_ze_event_list_t TmpWaitList; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( @@ -1079,7 +1079,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( _ur_buffer::Mapping MapInfo = {}; { // Lock automatically releases when this goes out of scope. - std::scoped_lock Guard(Buffer->Mutex); + std::scoped_lock Guard(Buffer->Mutex); auto It = Buffer->Mappings.find(MappedPtr); if (It == Buffer->Mappings.end()) { urPrint("urEnqueueMemUnmap: unknown memory mapping\n"); @@ -1111,7 +1111,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, Queue->Device)); - std::scoped_lock Guard(Buffer->Mutex); + std::scoped_lock Guard(Buffer->Mutex); if (Buffer->MapHostPtr) memcpy(ZeHandleDst + MapInfo.Offset, MappedPtr, MapInfo.Size); @@ -1122,7 +1122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( } // Lock automatically releases when this goes out of scope. - std::scoped_lock Lock(Queue->Mutex, + std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); ur_command_list_ptr_t CommandList{}; @@ -1201,7 +1201,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); // Device to Device copies are found to execute slower on copy engine // (versus compute engine). @@ -1235,7 +1235,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ) { std::ignore = Flags; // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); bool UseCopyEngine = false; @@ -1294,7 +1294,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ///< this particular command instance. ) { // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); auto ZeAdvice = ur_cast(Advice); @@ -1434,7 +1434,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_rect_offset_t ZeroOffset{0, 0, 0}; ur_rect_region_t Region{Width, Height, 0}; - std::scoped_lock lock(Queue->Mutex); + std::scoped_lock lock(Queue->Mutex); // Device to Device copies are found to execute slower on copy engine // (versus compute engine). @@ -1606,7 +1606,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( "no read-only or write-only yet."); } - std::shared_lock Lock(Context->Mutex); + std::shared_lock Lock(Context->Mutex); ZeStruct ZeImageDesc; UR_CALL(ur2zeImageDesc(ImageFormat, ImageDesc, ZeImageDesc)); @@ -1636,7 +1636,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( // zeCommandListAppendImageCopyFromMemory must not be called from // simultaneous threads with the same command list handle, so we need // exclusive lock. - std::scoped_lock Lock(Context->ImmediateCommandListMutex); + std::scoped_lock Lock(Context->ImmediateCommandListMutex); ZE2UR_CALL(zeCommandListAppendImageCopyFromMemory, (Context->ZeCommandListInit, ZeImage, Host, nullptr, nullptr, 0, nullptr)); @@ -1660,7 +1660,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( *Properties, ///< [in][optional] pointer to native memory creation ///< properties. ur_mem_handle_t *Mem) { - std::shared_lock Lock(Context->Mutex); + std::shared_lock Lock(Context->Mutex); ze_image_handle_t ZeHImage = ur_cast(NativeMem); @@ -1770,7 +1770,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( // Initialize the buffer synchronously with immediate offload // zeCommandListAppendMemoryCopy must not be called from simultaneous // threads with the same command list handle, so we need exclusive lock. - std::scoped_lock Lock(Context->ImmediateCommandListMutex); + std::scoped_lock Lock(Context->ImmediateCommandListMutex); ZE2UR_CALL(zeCommandListAppendMemoryCopy, (Context->ZeCommandListInit, ZeHandleDst, Host, Size, nullptr, 0, nullptr)); @@ -1833,7 +1833,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( !(static_cast<_ur_buffer *>(Buffer))->isSubBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - std::shared_lock Guard(Buffer->Mutex); + std::shared_lock Guard(Buffer->Mutex); if (Flags != UR_MEM_FLAG_READ_WRITE) { die("urMemBufferPartition: Level-Zero implements only read-write buffer," @@ -1859,7 +1859,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( ur_native_handle_t *NativeMem ///< [out] a pointer to the native handle of the mem. ) { - std::shared_lock Guard(Mem->Mutex); + std::shared_lock Guard(Mem->Mutex); char *ZeHandle = nullptr; UR_CALL(Mem->getZeHandle(ZeHandle, ur_mem_handle_t_::read_write)); *NativeMem = ur_cast(ZeHandle); @@ -1878,7 +1878,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ) { bool OwnNativeHandle = Properties->isNativeHandleOwned; - std::shared_lock Lock(Context->Mutex); + std::shared_lock Lock(Context->Mutex); // Get base of the allocation void *Base = nullptr; @@ -1923,7 +1923,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( } ur_platform_handle_t Plt = Context->getPlatform(); - std::unique_lock ContextsLock(Plt->ContextsMutex, + std::unique_lock ContextsLock(Plt->ContextsMutex, std::defer_lock); // If we don't own the native handle then we can't control deallocation of // that memory so there is no point of keeping track of the memory @@ -1957,7 +1957,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( // zeCommandListAppendMemoryCopy must not be called from simultaneous // threads with the same command list handle, so we need exclusive lock. - std::scoped_lock Lock(Context->ImmediateCommandListMutex); + std::scoped_lock Lock(Context->ImmediateCommandListMutex); ZE2UR_CALL(zeCommandListAppendMemoryCopy, (Context->ZeCommandListInit, ZeHandleDst, Ptr, Size, nullptr, 0, nullptr)); @@ -1983,8 +1983,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( UR_RESULT_ERROR_INVALID_VALUE); auto Buffer = reinterpret_cast<_ur_buffer *>(Memory); - std::shared_lock Lock(Buffer->Mutex); - UrReturnHelper ReturnValue(PropSize, MemInfo, PropSizeRet); + std::shared_lock Lock(Buffer->Mutex); + ur::ReturnHelper ReturnValue(PropSize, MemInfo, PropSizeRet); switch (MemInfoType) { case UR_MEM_INFO_CONTEXT: { @@ -2031,7 +2031,7 @@ static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, ur_device_handle_t Device, size_t Size) { ur_platform_handle_t Plt = Device->Platform; - std::unique_lock ContextsLock(Plt->ContextsMutex, + std::unique_lock ContextsLock(Plt->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { // Lock the mutex which is guarding contexts container in the platform. @@ -2235,7 +2235,7 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, HostAllocation.ZeHandle = reinterpret_cast(ZeHandleHost); HostAllocation.Valid = false; } - std::scoped_lock Lock(UrContext->ImmediateCommandListMutex); + std::scoped_lock Lock(UrContext->ImmediateCommandListMutex); if (!HostAllocation.Valid) { ZE2UR_CALL(zeCommandListAppendMemoryCopy, (UrContext->ZeCommandListInit, HostAllocation.ZeHandle, @@ -2250,7 +2250,7 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, HostAllocation.ZeHandle, Size, nullptr, 0, nullptr)); } else { // Perform P2P copy. - std::scoped_lock Lock(UrContext->ImmediateCommandListMutex); + std::scoped_lock Lock(UrContext->ImmediateCommandListMutex); ZE2UR_CALL(zeCommandListAppendMemoryCopy, (UrContext->ZeCommandListInit, ZeHandle, ZeHandleSrc, Size, nullptr, 0, nullptr)); @@ -2287,7 +2287,7 @@ ur_result_t _ur_buffer::free() { break; case allocation_t::free: { ur_platform_handle_t Plt = UrContext->getPlatform(); - std::scoped_lock Lock(IndirectAccessTrackingEnabled + std::scoped_lock Lock(IndirectAccessTrackingEnabled ? Plt->ContextsMutex : UrContext->Mutex); @@ -2416,7 +2416,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_event_handle_t *Event ///< [out][optional] return an event object that ///< identifies this particular command instance. ) { - std::scoped_lock Lock(Queue->Mutex); + std::scoped_lock Lock(Queue->Mutex); return enqueueMemFillHelper( // TODO: do we need a new command type for USM memset? diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 8efd5b136e..fc8f26c8ed 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 335a920294..0edfde44ee 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -80,7 +80,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( // 2. performance; we can save time by immediately return from cache. // - const std::lock_guard Lock{*URPlatformsCacheMutex}; + const std::lock_guard Lock{*URPlatformsCacheMutex}; if (!URPlatformCachePopulated) { try { // Level Zero does not have concept of Platforms, but Level Zero driver is @@ -139,7 +139,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( size_t *SizeRet ///< [out][optional] pointer to the actual number of bytes ///< being queried by pPlatformInfo. ) { - UrReturnHelper ReturnValue(Size, ParamValue, SizeRet); + ur::ReturnHelper ReturnValue(Size, ParamValue, SizeRet); switch (ParamName) { case UR_PLATFORM_INFO_NAME: @@ -304,7 +304,7 @@ ur_platform_handle_t_::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) { // mapping from L0 device handle to PI device assumed in this function. Until // Level-Zero adds unique ze_device_handle_t for sub-sub-devices, here we // filter out PI sub-sub-devices. - std::shared_lock Lock(URDevicesCacheMutex); + std::shared_lock Lock(URDevicesCacheMutex); auto it = std::find_if(URDevicesCache.begin(), URDevicesCache.end(), [&](std::unique_ptr &D) { return D.get()->ZeDevice == ZeDevice && @@ -319,7 +319,7 @@ ur_platform_handle_t_::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) { // Check the device cache and load it if necessary. ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { - std::scoped_lock Lock(URDevicesCacheMutex); + std::scoped_lock Lock(URDevicesCacheMutex); if (DeviceCachePopulated) { return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index 86aa4ec745..77fe1377a8 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -14,7 +14,7 @@ struct ur_device_handle_t_; -struct ur_platform_handle_t_ : public _ur_platform { +struct ur_platform_handle_t_ { ur_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. @@ -38,7 +38,7 @@ struct ur_platform_handle_t_ : public _ur_platform { // Cache UR devices for reuse std::vector> URDevicesCache; - ur_shared_mutex URDevicesCacheMutex; + ur::SharedMutex URDevicesCacheMutex; bool DeviceCachePopulated = false; // Check the device cache and load it if necessary. @@ -54,5 +54,5 @@ struct ur_platform_handle_t_ : public _ur_platform { // TODO: should be deleted when memory isolation in the context is implemented // in the driver. std::list Contexts; - ur_shared_mutex ContextsMutex; + ur::SharedMutex ContextsMutex; }; diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index f118a5b9dd..d03c1bfa9a 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -140,7 +140,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - std::scoped_lock Guard(hProgram->Mutex); + std::scoped_lock Guard(hProgram->Mutex); // Ask Level Zero to build and load the native code onto the device. ZeStruct ZeModuleDesc; @@ -234,7 +234,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( ///< null-terminated string. ) { std::ignore = Context; - std::scoped_lock Guard(Program->Mutex); + std::scoped_lock Guard(Program->Mutex); // It's only valid to compile a program created from IL (we don't support // programs created from source code). @@ -322,9 +322,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( // potential if there was some other code that holds more than one of these // locks simultaneously with "exclusive" access. However, there is no such // code like that, so this is also not a danger. - std::vector> Guards(count); + std::vector> Guards(count); for (uint32_t I = 0; I < count; I++) { - std::shared_lock Guard(phPrograms[I]->Mutex); + std::shared_lock Guard(phPrograms[I]->Mutex); Guards[I].swap(Guard); if (phPrograms[I]->State != ur_program_handle_t_::Object) { return UR_RESULT_ERROR_INVALID_OPERATION; @@ -512,7 +512,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ) { std::ignore = Device; - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); if (Program->State != ur_program_handle_t_::Exe) { return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; } @@ -571,7 +571,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( size_t *PropSizeRet ///< [out][optional] pointer to the actual size in ///< bytes of data copied to propName. ) { - UrReturnHelper ReturnValue(PropSize, ProgramInfo, PropSizeRet); + ur::ReturnHelper ReturnValue(PropSize, ProgramInfo, PropSizeRet); switch (PropName) { case UR_PROGRAM_INFO_REFERENCE_COUNT: @@ -585,7 +585,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( // TODO: return all devices this program exists for. return ReturnValue(Program->Context->Devices[0]); case UR_PROGRAM_INFO_BINARY_SIZES: { - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); size_t SzBinary; if (Program->State == ur_program_handle_t_::IL || Program->State == ur_program_handle_t_::Native || @@ -609,7 +609,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( if (!PBinary[0]) break; - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); if (Program->State == ur_program_handle_t_::IL || Program->State == ur_program_handle_t_::Native || Program->State == ur_program_handle_t_::Object) { @@ -624,7 +624,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( break; } case UR_PROGRAM_INFO_NUM_KERNELS: { - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); uint32_t NumKernels; if (Program->State == ur_program_handle_t_::IL || Program->State == ur_program_handle_t_::Native || @@ -641,7 +641,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( } case UR_PROGRAM_INFO_KERNEL_NAMES: try { - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); std::string PINames{""}; if (Program->State == ur_program_handle_t_::IL || Program->State == ur_program_handle_t_::Native || @@ -690,8 +690,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( ) { std::ignore = Device; - std::shared_lock Guard(Program->Mutex); - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + std::shared_lock Guard(Program->Mutex); + ur::ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); if (PropName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { ur_program_binary_type_t Type = UR_PROGRAM_BINARY_TYPE_NONE; if (Program->State == ur_program_handle_t_::Object) { @@ -769,7 +769,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( ) { auto ZeModule = ur_cast(NativeProgram); - std::shared_lock Guard(Program->Mutex); + std::shared_lock Guard(Program->Mutex); switch (Program->State) { case ur_program_handle_t_::Exe: { *ZeModule = Program->ZeModule; @@ -833,7 +833,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( *SpecConstants ///< [in][range(0, count)] array of specialization ///< constant value descriptions ) { - std::scoped_lock Guard(Program->Mutex); + std::scoped_lock Guard(Program->Mutex); // Remember the value of this specialization constant until the program is // built. Note that we only save the pointer to the buffer that contains the diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index f07e0df675..708f294668 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -40,7 +40,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, std::vector EventListToCleanup; { - std::unique_lock QueueLock(UrQueue->Mutex, + std::unique_lock QueueLock(UrQueue->Mutex, std::defer_lock); if (!QueueLocked) QueueLock.lock(); @@ -151,8 +151,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( ///< property value ) { - std::shared_lock Lock(Queue->Mutex); - UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); + std::shared_lock Lock(Queue->Mutex); + ur::ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); // TODO: consider support for queue properties and size switch ((uint32_t)ParamName) { // cast to avoid warnings on EXT enum values case UR_QUEUE_INFO_CONTEXT: @@ -292,7 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // for building to all the devices in the context. // { // Lock context for thread-safe update - std::scoped_lock Lock(Context->Mutex); + std::scoped_lock Lock(Context->Mutex); UR_ASSERT(Context->isValidDevice(Device), UR_RESULT_ERROR_INVALID_DEVICE); auto MakeFirst = Context->Devices.begin(); @@ -409,7 +409,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( ur_queue_handle_t Queue ///< [in] handle of the queue object to get access ) { { - std::scoped_lock Lock(Queue->Mutex); + std::scoped_lock Lock(Queue->Mutex); Queue->RefCountExternal++; } Queue->RefCount.increment(); @@ -422,7 +422,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( std::vector EventListToCleanup; { - std::scoped_lock Lock(Queue->Mutex); + std::scoped_lock Lock(Queue->Mutex); if ((--Queue->RefCountExternal) != 0) return UR_RESULT_SUCCESS; @@ -464,7 +464,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( return ze2urResult(ZeResult); } if (Queue->UsingImmCmdLists && Queue->OwnZeCommandQueue) { - std::scoped_lock Lock( + std::scoped_lock Lock( Queue->Context->ZeCommandListCacheMutex); const ur_command_list_info_t &MapEntry = it->second; if (MapEntry.CanReuse) { @@ -494,7 +494,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( // We don't need to synchronize the events since the queue // synchronized above already does that. { - std::scoped_lock EventLock(Event->Mutex); + std::scoped_lock EventLock(Event->Mutex); Event->Completed = true; } UR_CALL(CleanupCompletedEvent(Event)); @@ -513,7 +513,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( *NativeQueue ///< [out] a pointer to the native handle of the queue. ) { // Lock automatically releases when this goes out of scope. - std::shared_lock lock(Queue->Mutex); + std::shared_lock lock(Queue->Mutex); int32_t NativeHandleDesc{}; @@ -652,11 +652,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( ) { if (UrQueue->UsingImmCmdLists) { // Lock automatically releases when this goes out of scope. - std::scoped_lock Lock(UrQueue->Mutex); + std::scoped_lock Lock(UrQueue->Mutex); UrQueue->synchronize(); } else { - std::unique_lock Lock(UrQueue->Mutex); + std::unique_lock Lock(UrQueue->Mutex); std::vector ZeQueues; // execute any command list that may still be open. @@ -696,7 +696,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( // We can only do so if nothing else was submitted to the queue // while we were synchronizing it. if (!HoldLock) { - std::scoped_lock Lock(UrQueue->Mutex); + std::scoped_lock Lock(UrQueue->Mutex); if (LastCommandEvent == UrQueue->LastCommandEvent) { UrQueue->LastCommandEvent = nullptr; } @@ -708,7 +708,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( // available command lists. Events in the immediate command lists are cleaned // up in synchronize(). if (!UrQueue->UsingImmCmdLists) { - std::unique_lock Lock(UrQueue->Mutex); + std::unique_lock Lock(UrQueue->Mutex); resetCommandLists(UrQueue); } return UR_RESULT_SUCCESS; @@ -1115,7 +1115,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, // unique_lock destructor at the end of the function will unlock the mutex // if it was locked (which happens only if IndirectAccessTrackingEnabled is // true). - std::unique_lock ContextsLock( + std::unique_lock ContextsLock( Device->Platform->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { @@ -1160,7 +1160,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, // Update each command's event in the command-list to "see" this // proxy event as a host-visible counterpart. for (auto &Event : CommandList->second.EventList) { - std::scoped_lock EventLock(Event->Mutex); + std::scoped_lock EventLock(Event->Mutex); // Internal event doesn't need host-visible proxy. if (!Event->hasExternalRefs()) continue; @@ -1644,7 +1644,7 @@ ur_result_t ur_queue_handle_t_::resetCommandList( // If events in the queue are discarded then we can't check their status. // Helper for checking of event completion auto EventCompleted = [](ur_event_handle_t Event) -> bool { - std::scoped_lock EventLock(Event->Mutex); + std::scoped_lock EventLock(Event->Mutex); ze_result_t ZeResult = Event->Completed ? ZE_RESULT_SUCCESS @@ -1692,7 +1692,7 @@ ur_result_t ur_queue_handle_t_::resetCommandList( // Standard commandlists move in and out of the cache as they are recycled. // Immediate commandlists are always available. if (CommandList->second.ZeFence != nullptr && MakeAvailable) { - std::scoped_lock Lock(this->Context->ZeCommandListCacheMutex); + std::scoped_lock Lock(this->Context->ZeCommandListCacheMutex); auto &ZeCommandListCache = UseCopyEngine ? this->Context->ZeCopyCommandListCache[this->Device->ZeDevice] @@ -1982,7 +1982,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { ze_command_list_handle_t ZeCommandList = nullptr; { // Acquire lock to avoid race conditions. - std::scoped_lock Lock(Queue->Context->ZeCommandListCacheMutex); + std::scoped_lock Lock(Queue->Context->ZeCommandListCacheMutex); // Under mutex since operator[] does insertion on the first usage for every // unique ZeDevice. auto &ZeCommandListCache = diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 88281925ce..46ba561ef3 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include diff --git a/source/adapters/level_zero/sampler.cpp b/source/adapters/level_zero/sampler.cpp index f32c59aeeb..012fdec8c1 100644 --- a/source/adapters/level_zero/sampler.cpp +++ b/source/adapters/level_zero/sampler.cpp @@ -19,7 +19,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( ur_sampler_handle_t *Sampler ///< [out] pointer to handle of sampler object created ) { - std::shared_lock Lock(Context->Mutex); + std::shared_lock Lock(Context->Mutex); // Have the "0" device in context to own the sampler. Rely on Level-Zero // drivers to perform migration as necessary for sharing it across multiple diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index dd7bbf67b3..1e3fe00959 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index e4a00249a2..5e311ef025 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -318,9 +318,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( // indirect access. This lock also protects access to the context's data // structures. If indirect access tracking is not enabled then lock context // mutex to protect access to context's data structures. - std::shared_lock ContextLock(Context->Mutex, + std::shared_lock ContextLock(Context->Mutex, std::defer_lock); - std::unique_lock IndirectAccessTrackingLock( + std::unique_lock IndirectAccessTrackingLock( Plt->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { IndirectAccessTrackingLock.lock(); @@ -392,9 +392,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( // indirect access. This lock also protects access to the context's data // structures. If indirect access tracking is not enabled then lock context // mutex to protect access to context's data structures. - std::shared_lock ContextLock(Context->Mutex, + std::shared_lock ContextLock(Context->Mutex, std::defer_lock); - std::unique_lock IndirectAccessTrackingLock( + std::unique_lock IndirectAccessTrackingLock( Plt->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { IndirectAccessTrackingLock.lock(); @@ -496,7 +496,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( // indirect access. This lock also protects access to the context's data // structures. If indirect access tracking is not enabled then lock context // mutex to protect access to context's data structures. - std::scoped_lock Lock( + std::scoped_lock Lock( IndirectAccessTrackingEnabled ? Plt->ContextsMutex : Context->Mutex); if (IndirectAccessTrackingEnabled) { @@ -555,7 +555,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( ) { ur_platform_handle_t Plt = Context->getPlatform(); - std::scoped_lock Lock( + std::scoped_lock Lock( IndirectAccessTrackingEnabled ? Plt->ContextsMutex : Context->Mutex); return USMFreeHelper(Context, Mem); @@ -579,7 +579,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( (Context->ZeContext, Ptr, &ZeMemoryAllocationProperties, &ZeDeviceHandle)); - UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + ur::ReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); switch (PropName) { case UR_USM_ALLOC_INFO_TYPE: { ur_usm_type_t MemAllocaType; @@ -626,7 +626,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return UR_RESULT_ERROR_INVALID_VALUE; } - std::shared_lock ContextLock(Context->Mutex); + std::shared_lock ContextLock(Context->Mutex); auto SearchMatchingPool = [](std::unordered_map @@ -888,7 +888,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( *Pool = reinterpret_cast( new ur_usm_pool_handle_t_(Context, PoolDesc)); - std::shared_lock ContextLock(Context->Mutex); + std::shared_lock ContextLock(Context->Mutex); Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); } catch (const UsmAllocationException &Ex) { @@ -908,7 +908,7 @@ ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool ) { if (Pool->RefCount.decrementAndTest()) { - std::shared_lock ContextLock(Pool->Context->Mutex); + std::shared_lock ContextLock(Pool->Context->Mutex); Pool->Context->UsmPoolHandles.remove(Pool); delete Pool; } @@ -923,7 +923,7 @@ ur_result_t urUSMPoolGetInfo( ///< property size_t *PropSizeRet ///< [out] size in bytes returned in pool property value ) { - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + ur::ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { case UR_USM_POOL_INFO_REFERENCE_COUNT: { @@ -943,7 +943,7 @@ ur_result_t urUSMPoolGetInfo( // performed. ur_result_t ZeMemFreeHelper(ur_context_handle_t Context, void *Ptr) { ur_platform_handle_t Plt = Context->getPlatform(); - std::unique_lock ContextsLock(Plt->ContextsMutex, + std::unique_lock ContextsLock(Plt->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { ContextsLock.lock(); diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp index dc59bbcc4b..e8cd4f40a1 100644 --- a/source/adapters/level_zero/usm_p2p.cpp +++ b/source/adapters/level_zero/usm_p2p.cpp @@ -39,7 +39,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( std::ignore = peerDevice; std::ignore = propName; - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // Zero return value indicates that all of the queries currently return false. return ReturnValue(uint32_t{0}); } diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index de72502614..d685fa779f 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -18,7 +18,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: { @@ -96,7 +96,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( ur_context_handle_t hContext, const void *pStart, [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { size_t QuerySize; diff --git a/source/adapters/native_cpu/CMakeLists.txt b/source/adapters/native_cpu/CMakeLists.txt index 8549a4d3c0..88e8e94619 100644 --- a/source/adapters/native_cpu/CMakeLists.txt +++ b/source/adapters/native_cpu/CMakeLists.txt @@ -38,8 +38,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) set_target_properties(${TARGET_NAME} PROPERTIES diff --git a/source/adapters/native_cpu/adapter.cpp b/source/adapters/native_cpu/adapter.cpp index 622c3edc3d..7c70188ddc 100644 --- a/source/adapters/native_cpu/adapter.cpp +++ b/source/adapters/native_cpu/adapter.cpp @@ -49,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_ADAPTER_INFO_BACKEND: diff --git a/source/adapters/native_cpu/common.hpp b/source/adapters/native_cpu/common.hpp index d792cbbbcf..ae7903dfd2 100644 --- a/source/adapters/native_cpu/common.hpp +++ b/source/adapters/native_cpu/common.hpp @@ -10,7 +10,10 @@ #pragma once -#include "ur/ur.hpp" +#include + +#include "ur_api.h" +#include "ur_util.hpp" constexpr size_t MaxMessageSize = 256; @@ -58,7 +61,7 @@ namespace ur { // Base class to store common data struct _ur_object { - ur_shared_mutex Mutex; + ur::SharedMutex Mutex; }; // Todo: replace this with a common helper once it is available diff --git a/source/adapters/native_cpu/context.cpp b/source/adapters/native_cpu/context.cpp index c485725828..c9e7aebc67 100644 --- a/source/adapters/native_cpu/context.cpp +++ b/source/adapters/native_cpu/context.cpp @@ -11,7 +11,6 @@ #include #include -#include "ur/ur.hpp" #include "ur_api.h" #include "common.hpp" @@ -45,7 +44,7 @@ urContextRelease(ur_context_handle_t hContext) { UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_CONTEXT_INFO_NUM_DEVICES: diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 3432ce780e..4fff958d24 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -58,7 +58,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: diff --git a/source/adapters/native_cpu/device.hpp b/source/adapters/native_cpu/device.hpp index 60dacde5bc..8e01d57809 100644 --- a/source/adapters/native_cpu/device.hpp +++ b/source/adapters/native_cpu/device.hpp @@ -10,7 +10,7 @@ #pragma once -#include +#include struct ur_device_handle_t_ { ur_device_handle_t_(ur_platform_handle_t ArgPlt) : Platform(ArgPlt) {} diff --git a/source/adapters/native_cpu/image.cpp b/source/adapters/native_cpu/image.cpp index 01da9b7af8..86fdfd4ca8 100644 --- a/source/adapters/native_cpu/image.cpp +++ b/source/adapters/native_cpu/image.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include "ur/ur.hpp" +#include "ur_api.h" UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( [[maybe_unused]] ur_context_handle_t hContext, diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index 5a7a286adc..ee2dc6656d 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -75,9 +75,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, std::ignore = propName; std::ignore = pPropValue; - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // todo: check if we need this - // std::shared_lock Guard(hKernel->Mutex); + // std::shared_lock Guard(hKernel->Mutex); switch (propName) { // case UR_KERNEL_INFO_CONTEXT: // return ReturnValue(ur_context_handle_t{ hKernel->Program->Context }); @@ -108,7 +108,7 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { @@ -152,7 +152,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, std::ignore = hKernel; std::ignore = hDevice; - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: { // todo: set proper values diff --git a/source/adapters/native_cpu/memory.cpp b/source/adapters/native_cpu/memory.cpp index a190208ab7..0a8380ec88 100644 --- a/source/adapters/native_cpu/memory.cpp +++ b/source/adapters/native_cpu/memory.cpp @@ -90,7 +90,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( !(static_cast<_ur_buffer *>(hBuffer))->isSubBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - std::shared_lock Guard(hBuffer->Mutex); + std::shared_lock Guard(hBuffer->Mutex); if (flags != UR_MEM_FLAG_READ_WRITE) { die("urMemBufferPartition: NativeCPU implements only read-write buffer," diff --git a/source/adapters/native_cpu/platform.cpp b/source/adapters/native_cpu/platform.cpp index 61093f3eed..7dcaee26b4 100644 --- a/source/adapters/native_cpu/platform.cpp +++ b/source/adapters/native_cpu/platform.cpp @@ -11,7 +11,6 @@ #include "platform.hpp" #include "common.hpp" -#include "ur/ur.hpp" #include "ur_api.h" #include @@ -58,7 +57,7 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, if (hPlatform == nullptr) { return UR_RESULT_ERROR_INVALID_PLATFORM; } - UrReturnHelper ReturnValue(propSize, pParamValue, pSizeRet); + ur::ReturnHelper ReturnValue(propSize, pParamValue, pSizeRet); switch (propName) { case UR_PLATFORM_INFO_NAME: diff --git a/source/adapters/native_cpu/platform.hpp b/source/adapters/native_cpu/platform.hpp index 6791bba7aa..6fdce8dc5c 100644 --- a/source/adapters/native_cpu/platform.hpp +++ b/source/adapters/native_cpu/platform.hpp @@ -10,7 +10,7 @@ #pragma once -#include +#include #include "common.hpp" #include "device.hpp" diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index ccd96a3a24..dc97cf061b 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -135,7 +135,7 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_PROGRAM_INFO_REFERENCE_COUNT: diff --git a/source/adapters/native_cpu/queue.cpp b/source/adapters/native_cpu/queue.cpp index 516e66db64..ad1f21b915 100644 --- a/source/adapters/native_cpu/queue.cpp +++ b/source/adapters/native_cpu/queue.cpp @@ -11,7 +11,6 @@ #include "queue.hpp" #include "common.hpp" -#include "ur/ur.hpp" #include "ur_api.h" UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index c78e75e94f..66987cb024 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -39,8 +39,6 @@ add_ur_adapter(${TARGET_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) set_target_properties(${TARGET_NAME} PROPERTIES diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 8ae1e77755..89c5af9528 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -62,7 +62,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_ADAPTER_INFO_BACKEND: diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index d80f29594b..bb40e740a4 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include struct ur_exp_command_buffer_handle_t_ { ur_queue_handle_t hInternalQueue; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 4fe8bed408..7875641303 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -9,6 +9,9 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include +#include +#include namespace cl_adapter { diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0cb19694a6..ed7d93d02f 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -10,9 +10,13 @@ #include #include #include +#include #include +#include #include -#include +#include +#include +#include /** * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically map diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 3ada4a3d37..ac95254764 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "context.hpp" +#include #include #include @@ -73,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURContextInfoToCL(propName); switch (static_cast(propName)) { diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 27577eab39..fe3d0b5649 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -315,7 +315,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_device_info CLPropName = mapURDeviceInfoToCL(propName); diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 44157b826b..408f0ae7be 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -10,6 +10,7 @@ #include "common.hpp" #include +#include #include UR_APIEXPORT ur_result_t UR_APICALL diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index e93f0731c6..f2645dae53 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -366,7 +366,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); size_t CheckPropSize = 0; @@ -389,7 +389,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemImageInfoToCL(propName); size_t CheckPropSize = 0; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 7188a3e8f0..ee13bb8763 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -51,7 +51,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, size_t propSize, void *pPropValue, size_t *pSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); const cl_int CLPropName = mapURPlatformInfoToCL(propName); switch (static_cast(propName)) { diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index f628c8152b..87eaace831 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -277,7 +277,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, ur_program_build_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { if (propName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( cl_adapter::cast(hProgram), diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 4a39a91ef5..b27631d1ec 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -127,7 +127,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with // our enums (forced to be sizeof(uint32_t)) so this needs special handling. if (propName == UR_QUEUE_INFO_FLAGS) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + ur::ReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_command_queue_properties QueueProperties = 0; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 5c6fb231da..cd6fdc22fc 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -9,9 +9,12 @@ add_subdirectory(umf_pools) add_ur_library(ur_common STATIC umf_helpers.hpp ur_pool_manager.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/ur_util.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ur_util.hpp $<$:windows/ur_lib_loader.cpp> $<$:linux/ur_lib_loader.cpp> ) + add_library(${PROJECT_NAME}::common ALIAS ur_common) target_include_directories(ur_common PUBLIC diff --git a/source/common/ur_util.cpp b/source/common/ur_util.cpp new file mode 100644 index 0000000000..031c2f77ce --- /dev/null +++ b/source/common/ur_util.cpp @@ -0,0 +1,62 @@ + +//===--------- ur_adapter_util.cpp - Unified Runtime ---------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ur_util.hpp" +#include + +// Controls tracing UR calls from within the UR itself. +bool PrintTrace = [] { + const auto PiRet = ur_getenv("SYCL_PI_TRACE"); + const char *Trace = PiRet ? PiRet->c_str() : nullptr; + const int TraceValue = Trace ? std::stoi(Trace) : 0; + if (TraceValue == -1 || TraceValue == 2) { // Means print all traces + return true; + } + return false; +}(); + +// Apparatus for maintaining immutable cache of platforms. +std::vector *URPlatformsCache = + new std::vector; +ur::SpinLock *URPlatformsCacheMutex = new ur::SpinLock; +bool URPlatformCachePopulated = false; + +const bool SingleThreadMode = [] { + const auto UrRet = ur_getenv("UR_L0_SINGLE_THREAD_MODE"); + const auto PiRet = ur_getenv("SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE"); + const bool RetVal = UrRet ? std::stoi(UrRet->c_str()) + : (PiRet ? std::stoi(PiRet->c_str()) : 0); + return RetVal; +}(); + +std::optional ur_getenv(const char *name) { +#if defined(_WIN32) + constexpr int buffer_size = 1024; + char buffer[buffer_size]; + auto rc = GetEnvironmentVariableA(name, buffer, buffer_size); + if (0 != rc && rc < buffer_size) { + return std::string(buffer); + } else if (rc >= buffer_size) { + std::stringstream ex_ss; + ex_ss << "Environment variable " << name << " value too long!" + << " Maximum length is " << buffer_size - 1 << " characters."; + throw std::invalid_argument(ex_ss.str()); + } + return std::nullopt; +#else + const char *tmp_env = getenv(name); + if (tmp_env != nullptr) { + return std::string(tmp_env); + } else { + return std::nullopt; + } +#endif +} diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 17e81b2487..99ae002cdf 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -13,16 +13,27 @@ #include +#include +#include +#include +#include +#include #include #include +#include +#include #include +#include #include #include #include +#include #include #ifdef _WIN32 +#define NOMINMAX #include +#undef NOMINMAX inline int ur_getpid(void) { return static_cast(GetCurrentProcessId()); } #else @@ -93,29 +104,7 @@ inline std::string create_library_path(const char *name, const char *path) { #endif /////////////////////////////////////////////////////////////////////////////// -inline std::optional ur_getenv(const char *name) { -#if defined(_WIN32) - constexpr int buffer_size = 1024; - char buffer[buffer_size]; - auto rc = GetEnvironmentVariableA(name, buffer, buffer_size); - if (0 != rc && rc < buffer_size) { - return std::string(buffer); - } else if (rc >= buffer_size) { - std::stringstream ex_ss; - ex_ss << "Environment variable " << name << " value too long!" - << " Maximum length is " << buffer_size - 1 << " characters."; - throw std::invalid_argument(ex_ss.str()); - } - return std::nullopt; -#else - const char *tmp_env = getenv(name); - if (tmp_env != nullptr) { - return std::string(tmp_env); - } else { - return std::nullopt; - } -#endif -} +std::optional ur_getenv(const char *name); inline bool getenv_tobool(const char *name) { auto env = ur_getenv(name); @@ -296,7 +285,64 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) { template inline constexpr bool ur_always_false_t = false; +// TODO: promote all of the below extensions to the Unified Runtime +// and get rid of these ZER_EXT constants. +const ur_device_info_t UR_EXT_DEVICE_INFO_OPENCL_C_VERSION = + (ur_device_info_t)0x103D; + +const ur_command_t UR_EXT_COMMAND_TYPE_USER = + (ur_command_t)((uint32_t)UR_COMMAND_FORCE_UINT32 - 1); + +/// Program metadata tags recognized by the UR adapters. For kernels the tag +/// must appear after the kernel name. +#define __SYCL_UR_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE \ + "@reqd_work_group_size" +#define __SYCL_UR_PROGRAM_METADATA_GLOBAL_ID_MAPPING "@global_id_mapping" +#define __SYCL_UR_PROGRAM_METADATA_TAG_NEED_FINALIZATION "Requires finalization" + +// Terminates the process with a catastrophic error message. +[[noreturn]] inline void die(const char *Message) { + std::cerr << "die: " << Message << std::endl; + std::terminate(); +} + +// A single-threaded app has an opportunity to enable this mode to avoid +// overhead from mutex locking. Default value is 0 which means that single +// thread mode is disabled. +extern const bool SingleThreadMode; + +// The wrapper for immutable data. +// The data is initialized only once at first access (via ->) with the +// initialization function provided in Init. All subsequent access to +// the data just returns the already stored data. +// +template struct ZeCache : private T { + // The initialization function takes a reference to the data + // it is going to initialize, since it is private here in + // order to disallow access other than through "->". + // + using InitFunctionType = std::function; + InitFunctionType Compute{nullptr}; + std::once_flag Computed; + + ZeCache() : T{} {} + + // Access to the fields of the original T data structure. + T *operator->() { + std::call_once(Computed, Compute, static_cast(*this)); + return this; + } +}; + +// Helper for one-liner validation +#define UR_ASSERT(Condition, Error) \ + if (!(Condition)) \ + return Error; + +// The getInfo*/ReturnHelper facilities provide shortcut way of +// writing return bytes for the various getInfo APIs. namespace ur { + [[noreturn]] inline void unreachable() { #ifdef _MSC_VER __assume(0); @@ -304,6 +350,226 @@ namespace ur { __builtin_unreachable(); #endif } + +// Class which acts like shared_mutex if SingleThreadMode variable is not set. +// If SingleThreadMode variable is set then mutex operations are turned into +// nop. +class SharedMutex { + std::shared_mutex Mutex; + + public: + void lock() { + if (!SingleThreadMode) { + Mutex.lock(); + } + } + bool try_lock() { return SingleThreadMode ? true : Mutex.try_lock(); } + void unlock() { + if (!SingleThreadMode) { + Mutex.unlock(); + } + } + + void lock_shared() { + if (!SingleThreadMode) { + Mutex.lock_shared(); + } + } + bool try_lock_shared() { + return SingleThreadMode ? true : Mutex.try_lock_shared(); + } + void unlock_shared() { + if (!SingleThreadMode) { + Mutex.unlock_shared(); + } + } +}; + +// Class which acts like std::mutex if SingleThreadMode variable is not set. +// If SingleThreadMode variable is set then mutex operations are turned into +// nop. +class Mutex { + std::mutex Mutex; + friend class Lock; + + public: + void lock() { + if (!SingleThreadMode) { + Mutex.lock(); + } + } + bool try_lock() { return SingleThreadMode ? true : Mutex.try_lock(); } + void unlock() { + if (!SingleThreadMode) { + Mutex.unlock(); + } + } +}; + +class Lock { + std::unique_lock UniqueLock; + + public: + explicit Lock(Mutex &Mutex) { + if (!SingleThreadMode) { + UniqueLock = std::unique_lock(Mutex.Mutex); + } + } +}; + +/// SpinLock is a synchronization primitive, that uses atomic variable and +/// causes thread trying acquire lock wait in loop while repeatedly check if +/// the lock is available. +/// +/// One important feature of this implementation is that std::atomic can +/// be zero-initialized. This allows SpinLock to have trivial constructor and +/// destructor, which makes it possible to use it in global context (unlike +/// std::mutex, that doesn't provide such guarantees). +class SpinLock { + public: + void lock() { + while (MLock.test_and_set(std::memory_order_acquire)) { + std::this_thread::yield(); + } + } + void unlock() { MLock.clear(std::memory_order_release); } + + private: + std::atomic_flag MLock = ATOMIC_FLAG_INIT; +}; + +template +ur_result_t getInfoImpl(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet, T Value, size_t ValueSize, + Assign &&AssignFunc) { + if (!ParamValue && !ParamValueSizeRet) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ParamValue != nullptr) { + + if (ParamValueSize < ValueSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + AssignFunc(ParamValue, Value, ValueSize); + } + + if (ParamValueSizeRet != nullptr) { + *ParamValueSizeRet = ValueSize; + } + + return UR_RESULT_SUCCESS; +} + +template +ur_result_t getInfo(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet, T Value) { + + auto assignment = [](void *ParamValue, T Value, size_t ValueSize) { + std::ignore = ValueSize; + *static_cast(ParamValue) = Value; + }; + + return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value, + sizeof(T), assignment); +} + +template +ur_result_t getInfoArray(size_t ArrayLength, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet, + const T *value) { + return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, value, + ArrayLength * sizeof(T), memcpy); +} + +template +ur_result_t getInfoArray(size_t ArrayLength, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet, + const T *value) { + if (ParamValue) { + memset(ParamValue, 0, ParamValueSize); + for (uint32_t I = 0; I < ArrayLength; I++) { + ((RetType *)ParamValue)[I] = (RetType)value[I]; + } + } + if (ParamValueSizeRet) { + *ParamValueSizeRet = ArrayLength * sizeof(RetType); + } + return UR_RESULT_SUCCESS; +} + +template <> +inline ur_result_t +getInfo(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet, const char *Value) { + return getInfoArray(strlen(Value) + 1, ParamValueSize, ParamValue, + ParamValueSizeRet, Value); +} + +class ReturnHelper { + public: + ReturnHelper(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) + : ParamValueSize(ParamValueSize), ParamValue(ParamValue), + ParamValueSizeRet(ParamValueSizeRet) {} + + // A version where in/out info size is represented by a single pointer + // to a value which is updated on return + ReturnHelper(size_t *ParamValueSize, void *ParamValue) + : ParamValueSize(*ParamValueSize), ParamValue(ParamValue), + ParamValueSizeRet(ParamValueSize) {} + + // Scalar return value + template ur_result_t operator()(const T &t) { + return ur::getInfo(ParamValueSize, ParamValue, ParamValueSizeRet, t); + } + + // Array return value + template ur_result_t operator()(const T *t, size_t s) { + return ur::getInfoArray(s, ParamValueSize, ParamValue, + ParamValueSizeRet, t); + } + + // Array return value where element type is differrent from T + template + ur_result_t operator()(const T *t, size_t s) { + return ur::getInfoArray(s, ParamValueSize, ParamValue, + ParamValueSizeRet, t); + } + + protected: + size_t ParamValueSize; + void *ParamValue; + size_t *ParamValueSizeRet; +}; + } // namespace ur +template To ur_cast(From Value) { + // TODO: see if more sanity checks are possible. + assert(sizeof(From) == sizeof(To)); + return (To)(Value); +} + +template <> uint32_t inline ur_cast(uint64_t Value) { + // Cast value and check that we don't lose any information. + uint32_t CastedValue = (uint32_t)(Value); + assert((uint64_t)CastedValue == Value); + return CastedValue; +} + +// Controls tracing UR calls from within the UR itself. +extern bool PrintTrace; + +// Apparatus for maintaining immutable cache of platforms. +// +// Note we only create a simple pointer variables such that C++ RT won't +// deallocate them automatically at the end of the main program. +// The heap memory allocated for these global variables reclaimed only at +// explicit tear-down. +extern std::vector *URPlatformsCache; +extern ur::SpinLock *URPlatformsCacheMutex; +extern bool URPlatformCachePopulated; + #endif /* UR_UTIL_H */ diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index b0c8bbcb86..37b58d60ca 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -104,7 +104,6 @@ endif() if(UR_ENABLE_SANITIZER) target_sources(ur_loader PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../ur/ur.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/common.hpp diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index 394405c056..9dc71410e5 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -136,19 +136,19 @@ ur_result_t SanitizerInterceptor::allocateMemory( // For updating shadow memory if (DeviceInfo) { // device/shared USM - std::scoped_lock Guard(DeviceInfo->Mutex); + std::scoped_lock Guard(DeviceInfo->Mutex); DeviceInfo->AllocInfos.emplace_back(AllocInfo); } else { // host USM's AllocInfo needs to insert into all devices for (auto &pair : ContextInfo->DeviceMap) { auto DeviceInfo = pair.second; - std::scoped_lock Guard(DeviceInfo->Mutex); + std::scoped_lock Guard(DeviceInfo->Mutex); DeviceInfo->AllocInfos.emplace_back(AllocInfo); } } // For memory release { - std::scoped_lock Guard(ContextInfo->Mutex); + std::scoped_lock Guard(ContextInfo->Mutex); ContextInfo->AllocatedUSMMap[AllocBegin] = AllocInfo; } @@ -164,7 +164,7 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context, void *Ptr) { auto ContextInfo = getContextInfo(Context); - std::shared_lock Guard(ContextInfo->Mutex); + std::shared_lock Guard(ContextInfo->Mutex); auto Addr = reinterpret_cast(Ptr); // Find the last element is not greater than key @@ -203,7 +203,7 @@ bool SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, auto ContextInfo = getContextInfo(Context); auto QueueInfo = ContextInfo->getQueueInfo(Queue); - std::scoped_lock Guard(QueueInfo->Mutex); + std::scoped_lock Guard(QueueInfo->Mutex); Event = QueueInfo->LastEvent; QueueInfo->LastEvent = nullptr; @@ -467,12 +467,12 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) { auto DeviceInfo = ContextInfo->getDeviceInfo(Device); auto QueueInfo = ContextInfo->getQueueInfo(Queue); - std::shared_lock HostGuard(HostInfo->Mutex, + std::shared_lock HostGuard(HostInfo->Mutex, std::defer_lock); - std::unique_lock DeviceGuard(DeviceInfo->Mutex, + std::unique_lock DeviceGuard(DeviceInfo->Mutex, std::defer_lock); - std::scoped_lock, - std::unique_lock, ur_mutex> + std::scoped_lock, + std::unique_lock, ur::Mutex> Guard(HostGuard, DeviceGuard, QueueInfo->Mutex); ur_event_handle_t LastEvent = QueueInfo->LastEvent; @@ -490,7 +490,7 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) { ur_result_t SanitizerInterceptor::insertContext(ur_context_handle_t Context) { auto ContextInfo = std::make_shared(); - std::scoped_lock Guard(m_ContextMapMutex); + std::scoped_lock Guard(m_ContextMapMutex); assert(m_ContextMap.find(Context) == m_ContextMap.end()); m_ContextMap.emplace(Context, std::move(ContextInfo)); @@ -498,7 +498,7 @@ ur_result_t SanitizerInterceptor::insertContext(ur_context_handle_t Context) { } ur_result_t SanitizerInterceptor::eraseContext(ur_context_handle_t Context) { - std::scoped_lock Guard(m_ContextMapMutex); + std::scoped_lock Guard(m_ContextMapMutex); assert(m_ContextMap.find(Context) != m_ContextMap.end()); m_ContextMap.erase(Context); return UR_RESULT_SUCCESS; @@ -532,7 +532,7 @@ ur_result_t SanitizerInterceptor::insertDevice(ur_context_handle_t Context, UR_CALL(allocShadowMemory(Context, DeviceInfo)); auto ContextInfo = getContextInfo(Context); - std::scoped_lock Guard(ContextInfo->Mutex); + std::scoped_lock Guard(ContextInfo->Mutex); ContextInfo->DeviceMap.emplace(Device, std::move(DeviceInfo)); return UR_RESULT_SUCCESS; @@ -544,7 +544,7 @@ ur_result_t SanitizerInterceptor::insertQueue(ur_context_handle_t Context, QueueInfo->LastEvent = nullptr; auto ContextInfo = getContextInfo(Context); - std::scoped_lock Guard(ContextInfo->Mutex); + std::scoped_lock Guard(ContextInfo->Mutex); ContextInfo->QueueMap.emplace(Queue, std::move(QueueInfo)); return UR_RESULT_SUCCESS; @@ -553,7 +553,7 @@ ur_result_t SanitizerInterceptor::insertQueue(ur_context_handle_t Context, ur_result_t SanitizerInterceptor::eraseQueue(ur_context_handle_t Context, ur_queue_handle_t Queue) { auto ContextInfo = getContextInfo(Context); - std::scoped_lock Guard(ContextInfo->Mutex); + std::scoped_lock Guard(ContextInfo->Mutex); assert(ContextInfo->QueueMap.find(Queue) != ContextInfo->QueueMap.end()); ContextInfo->QueueMap.erase(Queue); return UR_RESULT_SUCCESS; @@ -569,7 +569,7 @@ void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue, auto DeviceInfo = ContextInfo->getDeviceInfo(Device); auto QueueInfo = ContextInfo->getQueueInfo(Queue); - std::scoped_lock Guard(QueueInfo->Mutex); + std::scoped_lock Guard(QueueInfo->Mutex); ur_event_handle_t LastEvent = QueueInfo->LastEvent; { diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp index edad3f926e..ecdfa92393 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -13,6 +13,7 @@ #pragma once #include "common.hpp" +#include "ur_util.hpp" #include #include @@ -40,36 +41,36 @@ struct DeviceInfo { uptr ShadowOffsetEnd; // Lock InitPool & AllocInfos - ur_shared_mutex Mutex; + ur::SharedMutex Mutex; std::vector> AllocInfos; }; struct QueueInfo { - ur_mutex Mutex; + ur::Mutex Mutex; ur_event_handle_t LastEvent; }; struct ContextInfo { std::shared_ptr getDeviceInfo(ur_device_handle_t Device) { - std::shared_lock Guard(Mutex); + std::shared_lock Guard(Mutex); assert(DeviceMap.find(Device) != DeviceMap.end()); return DeviceMap[Device]; } std::shared_ptr getQueueInfo(ur_queue_handle_t Queue) { - std::shared_lock Guard(Mutex); + std::shared_lock Guard(Mutex); assert(QueueMap.find(Queue) != QueueMap.end()); return QueueMap[Queue]; } std::shared_ptr getUSMAllocInfo(uptr Address) { - std::shared_lock Guard(Mutex); + std::shared_lock Guard(Mutex); assert(AllocatedUSMMap.find(Address) != AllocatedUSMMap.end()); return AllocatedUSMMap[Address]; } - ur_shared_mutex Mutex; + ur::SharedMutex Mutex; std::unordered_map> DeviceMap; std::unordered_map> QueueMap; @@ -127,7 +128,7 @@ class SanitizerInterceptor { ur_event_handle_t *OutEvent); std::shared_ptr getContextInfo(ur_context_handle_t Context) { - std::shared_lock Guard(m_ContextMapMutex); + std::shared_lock Guard(m_ContextMapMutex); assert(m_ContextMap.find(Context) != m_ContextMap.end()); return m_ContextMap[Context]; } @@ -135,7 +136,7 @@ class SanitizerInterceptor { private: std::unordered_map> m_ContextMap; - ur_shared_mutex m_ContextMapMutex; + ur::SharedMutex m_ContextMapMutex; }; } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/common.hpp b/source/loader/layers/sanitizer/common.hpp index 8b80814b9c..52d9752505 100644 --- a/source/loader/layers/sanitizer/common.hpp +++ b/source/loader/layers/sanitizer/common.hpp @@ -12,7 +12,7 @@ #pragma once -#include "ur/ur.hpp" +#include "ur_api.h" #include "ur_ddi.h" #include diff --git a/source/ur/.clang-format b/source/ur/.clang-format deleted file mode 100644 index c8daebc205..0000000000 --- a/source/ur/.clang-format +++ /dev/null @@ -1,4 +0,0 @@ ---- -Language: Cpp -BasedOnStyle: LLVM -... diff --git a/source/ur/ur.cpp b/source/ur/ur.cpp deleted file mode 100644 index 4de87d53c2..0000000000 --- a/source/ur/ur.cpp +++ /dev/null @@ -1,30 +0,0 @@ - -//===--------- ur.cpp - Unified Runtime ----------------------------------===// -// -// Copyright (C) 2023 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ur.hpp" -#include - -// Controls tracing UR calls from within the UR itself. -bool PrintTrace = [] { - const char *PiRet = std::getenv("SYCL_PI_TRACE"); - const char *Trace = PiRet ? PiRet : nullptr; - const int TraceValue = Trace ? std::stoi(Trace) : 0; - if (TraceValue == -1 || TraceValue == 2) { // Means print all traces - return true; - } - return false; -}(); - -// Apparatus for maintaining immutable cache of platforms. -std::vector *URPlatformsCache = - new std::vector; -SpinLock *URPlatformsCacheMutex = new SpinLock; -bool URPlatformCachePopulated = false; diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp deleted file mode 100644 index 11d619ea04..0000000000 --- a/source/ur/ur.hpp +++ /dev/null @@ -1,312 +0,0 @@ -//===--------- ur.hpp - Unified Runtime ----------------------------------===// -// -// Copyright (C) 2023 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "ur_util.hpp" - -template To ur_cast(From Value) { - // TODO: see if more sanity checks are possible. - assert(sizeof(From) == sizeof(To)); - return (To)(Value); -} - -template <> uint32_t inline ur_cast(uint64_t Value) { - // Cast value and check that we don't lose any information. - uint32_t CastedValue = (uint32_t)(Value); - assert((uint64_t)CastedValue == Value); - return CastedValue; -} - -// TODO: promote all of the below extensions to the Unified Runtime -// and get rid of these ZER_EXT constants. -const ur_device_info_t UR_EXT_DEVICE_INFO_OPENCL_C_VERSION = - (ur_device_info_t)0x103D; - -const ur_command_t UR_EXT_COMMAND_TYPE_USER = - (ur_command_t)((uint32_t)UR_COMMAND_FORCE_UINT32 - 1); - -/// Program metadata tags recognized by the UR adapters. For kernels the tag -/// must appear after the kernel name. -#define __SYCL_UR_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE \ - "@reqd_work_group_size" -#define __SYCL_UR_PROGRAM_METADATA_GLOBAL_ID_MAPPING "@global_id_mapping" -#define __SYCL_UR_PROGRAM_METADATA_TAG_NEED_FINALIZATION "Requires finalization" - -// Terminates the process with a catastrophic error message. -[[noreturn]] inline void die(const char *Message) { - std::cerr << "die: " << Message << std::endl; - std::terminate(); -} - -// A single-threaded app has an opportunity to enable this mode to avoid -// overhead from mutex locking. Default value is 0 which means that single -// thread mode is disabled. -static const bool SingleThreadMode = [] { - auto UrRet = ur_getenv("UR_L0_SINGLE_THREAD_MODE"); - auto PiRet = ur_getenv("SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE"); - const bool RetVal = - UrRet ? std::stoi(*UrRet) : (PiRet ? std::stoi(*PiRet) : 0); - return RetVal; -}(); - -// Class which acts like shared_mutex if SingleThreadMode variable is not set. -// If SingleThreadMode variable is set then mutex operations are turned into -// nop. -class ur_shared_mutex { - std::shared_mutex Mutex; - -public: - void lock() { - if (!SingleThreadMode) { - Mutex.lock(); - } - } - bool try_lock() { return SingleThreadMode ? true : Mutex.try_lock(); } - void unlock() { - if (!SingleThreadMode) { - Mutex.unlock(); - } - } - - void lock_shared() { - if (!SingleThreadMode) { - Mutex.lock_shared(); - } - } - bool try_lock_shared() { - return SingleThreadMode ? true : Mutex.try_lock_shared(); - } - void unlock_shared() { - if (!SingleThreadMode) { - Mutex.unlock_shared(); - } - } -}; - -// Class which acts like std::mutex if SingleThreadMode variable is not set. -// If SingleThreadMode variable is set then mutex operations are turned into -// nop. -class ur_mutex { - std::mutex Mutex; - friend class ur_lock; - -public: - void lock() { - if (!SingleThreadMode) { - Mutex.lock(); - } - } - bool try_lock() { return SingleThreadMode ? true : Mutex.try_lock(); } - void unlock() { - if (!SingleThreadMode) { - Mutex.unlock(); - } - } -}; - -class ur_lock { - std::unique_lock Lock; - -public: - explicit ur_lock(ur_mutex &Mutex) { - if (!SingleThreadMode) { - Lock = std::unique_lock(Mutex.Mutex); - } - } -}; - -/// SpinLock is a synchronization primitive, that uses atomic variable and -/// causes thread trying acquire lock wait in loop while repeatedly check if -/// the lock is available. -/// -/// One important feature of this implementation is that std::atomic can -/// be zero-initialized. This allows SpinLock to have trivial constructor and -/// destructor, which makes it possible to use it in global context (unlike -/// std::mutex, that doesn't provide such guarantees). -class SpinLock { -public: - void lock() { - while (MLock.test_and_set(std::memory_order_acquire)) { - std::this_thread::yield(); - } - } - void unlock() { MLock.clear(std::memory_order_release); } - -private: - std::atomic_flag MLock = ATOMIC_FLAG_INIT; -}; - -// The wrapper for immutable data. -// The data is initialized only once at first access (via ->) with the -// initialization function provided in Init. All subsequent access to -// the data just returns the already stored data. -// -template struct ZeCache : private T { - // The initialization function takes a reference to the data - // it is going to initialize, since it is private here in - // order to disallow access other than through "->". - // - using InitFunctionType = std::function; - InitFunctionType Compute{nullptr}; - std::once_flag Computed; - - ZeCache() : T{} {} - - // Access to the fields of the original T data structure. - T *operator->() { - std::call_once(Computed, Compute, static_cast(*this)); - return this; - } -}; - -// Helper for one-liner validation -#define UR_ASSERT(condition, error) \ - if (!(condition)) \ - return error; - -// TODO: populate with target agnostic handling of UR platforms -struct _ur_platform {}; - -// Controls tracing UR calls from within the UR itself. -extern bool PrintTrace; - -// Apparatus for maintaining immutable cache of platforms. -// -// Note we only create a simple pointer variables such that C++ RT won't -// deallocate them automatically at the end of the main program. -// The heap memory allocated for these global variables reclaimed only at -// explicit tear-down. -extern std::vector *URPlatformsCache; -extern SpinLock *URPlatformsCacheMutex; -extern bool URPlatformCachePopulated; - -// The getInfo*/ReturnHelper facilities provide shortcut way of -// writing return bytes for the various getInfo APIs. -namespace ur { -template -ur_result_t getInfoImpl(size_t param_value_size, void *param_value, - size_t *param_value_size_ret, T value, - size_t value_size, Assign &&assign_func) { - if (!param_value && !param_value_size_ret) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - - if (param_value != nullptr) { - - if (param_value_size < value_size) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - assign_func(param_value, value, value_size); - } - - if (param_value_size_ret != nullptr) { - *param_value_size_ret = value_size; - } - - return UR_RESULT_SUCCESS; -} - -template -ur_result_t getInfo(size_t param_value_size, void *param_value, - size_t *param_value_size_ret, T value) { - - auto assignment = [](void *param_value, T value, size_t value_size) { - std::ignore = value_size; - *static_cast(param_value) = value; - }; - - return getInfoImpl(param_value_size, param_value, param_value_size_ret, value, - sizeof(T), assignment); -} - -template -ur_result_t getInfoArray(size_t array_length, size_t param_value_size, - void *param_value, size_t *param_value_size_ret, - const T *value) { - return getInfoImpl(param_value_size, param_value, param_value_size_ret, value, - array_length * sizeof(T), memcpy); -} - -template -ur_result_t getInfoArray(size_t array_length, size_t param_value_size, - void *param_value, size_t *param_value_size_ret, - const T *value) { - if (param_value) { - memset(param_value, 0, param_value_size); - for (uint32_t I = 0; I < array_length; I++) { - ((RetType *)param_value)[I] = (RetType)value[I]; - } - } - if (param_value_size_ret) { - *param_value_size_ret = array_length * sizeof(RetType); - } - return UR_RESULT_SUCCESS; -} - -template <> -inline ur_result_t -getInfo(size_t param_value_size, void *param_value, - size_t *param_value_size_ret, const char *value) { - return getInfoArray(strlen(value) + 1, param_value_size, param_value, - param_value_size_ret, value); -} -} // namespace ur - -class UrReturnHelper { -public: - UrReturnHelper(size_t param_value_size, void *param_value, - size_t *param_value_size_ret) - : param_value_size(param_value_size), param_value(param_value), - param_value_size_ret(param_value_size_ret) {} - - // A version where in/out info size is represented by a single pointer - // to a value which is updated on return - UrReturnHelper(size_t *param_value_size, void *param_value) - : param_value_size(*param_value_size), param_value(param_value), - param_value_size_ret(param_value_size) {} - - // Scalar return value - template ur_result_t operator()(const T &t) { - return ur::getInfo(param_value_size, param_value, param_value_size_ret, t); - } - - // Array return value - template ur_result_t operator()(const T *t, size_t s) { - return ur::getInfoArray(s, param_value_size, param_value, - param_value_size_ret, t); - } - - // Array return value where element type is differrent from T - template - ur_result_t operator()(const T *t, size_t s) { - return ur::getInfoArray(s, param_value_size, param_value, - param_value_size_ret, t); - } - -protected: - size_t param_value_size; - void *param_value; - size_t *param_value_size_ret; -};