From 526f7e6b150e22c56a4c4312e606e17cb60a3bdc Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 12 Dec 2023 14:08:27 +0000 Subject: [PATCH 01/32] Remove inline from helper --- source/adapters/hip/memory.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 68ded26263..6a220f53c4 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -458,9 +458,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { return UR_RESULT_SUCCESS; } -inline ur_result_t -allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, - const ur_device_handle_t hDevice) { +ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, + const ur_device_handle_t hDevice) { ScopedContext Active(hDevice); ur_lock LockGuard(Mem->MemoryAllocationMutex); From c5c129e8094eef0befa30d9281b32276263cf265 Mon Sep 17 00:00:00 2001 From: Luke Drummond Date: Thu, 21 Dec 2023 14:46:41 +0000 Subject: [PATCH 02/32] [hip] Fix HSA headers lookup ROCm installations prior to v6 don't respect the traditional installation layout and install the HSA headers to `$PREFIX/hsa/include/hsa` whereas in rocm6 it looks like they're putting it in the right place at `$PREFIX/include/hsa`. That cleanup from AMD is good news, but it means that our workarounds now break and we need to check both places. --- source/adapters/hip/CMakeLists.txt | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 1ed9d52c2b..6db430dffd 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -13,7 +13,8 @@ set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include") -set(UR_HIP_HSA_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/hsa/include") +set(UR_HIP_HSA_INCLUDE_DIRS + "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include") # Set HIP lib dir set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib") @@ -31,9 +32,16 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD") endif() # Check if HSA include path exists - if(NOT EXISTS "${UR_HIP_HSA_INCLUDE_DIR}") - message(FATAL_ERROR "Couldn't find the HSA include directory at '${UR_HIP_HSA_INCLUDE_DIR}'," - " please check ROCm installation.") + foreach(D IN LISTS UR_HIP_HSA_INCLUDE_DIRS) + if(EXISTS "${D}") + set(UR_HIP_HSA_INCLUDE_DIR "${D}") + break() + endif() + endforeach() + if(NOT UR_HIP_HSA_INCLUDE_DIR) + message(FATAL_ERROR "Couldn't find the HSA include directory in any of " + "these paths: '${UR_HIP_HSA_INCLUDE_DIRS}'. Please check ROCm " + "installation.") endif() endif() From 77b4c1a43080c44394d653d44f2264e88be30ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20=C5=BDu=C5=BEek?= Date: Wed, 20 Dec 2023 15:17:01 +0000 Subject: [PATCH 03/32] Werror fixes A bunch of fixes to get `-Werror` builds to pass, both on Linux and on Windows (`/WX`) * Disable verbose Windows warnings * `_CRT_SECURE_NO_WARNINGS` because of `std::getenv` * C4267 because of conversions from `size_t` to other integers * Define `WIN32_LEAN_AND_MEAN` and `NOMINMAX` on Windows * Gets rid of some errors and speeds up the build * Convert integer CUDA objects to std::uintptr_t before reinterpreting them to a pointer * Fixed "unused function" warning for `GetHipFormatPixelSize` * There was a lot of duplication, now a single function called `imageElementByteSize` * Mark some variables as potentially unused (only used in asserts) * Other minor fixes --- CMakeLists.txt | 3 -- cmake/helpers.cmake | 8 ++++- source/adapters/cuda/device.cpp | 3 +- source/adapters/cuda/image.cpp | 4 +-- source/adapters/cuda/program.cpp | 3 +- source/adapters/cuda/sampler.cpp | 2 +- source/adapters/hip/enqueue.cpp | 21 ++---------- source/adapters/hip/memory.cpp | 32 +++---------------- source/adapters/level_zero/CMakeLists.txt | 2 +- source/adapters/native_cpu/context.cpp | 8 ++--- source/common/ur_util.hpp | 2 -- .../cuda/urDeviceCreateWithNativeHandle.cpp | 4 +-- test/conformance/source/environment.cpp | 2 +- 13 files changed, 29 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fbf9947688..fcdf90f173 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,9 +116,6 @@ if(UR_ENABLE_TRACING) ) if (MSVC) set(TARGET_XPTI $,xpti,xptid>) - - # disable warning C4267: The compiler detected a conversion from size_t to a smaller type. - target_compile_options(xptifw PRIVATE /wd4267) else() set(TARGET_XPTI xpti) endif() diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 35c4789432..74a634ed28 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -84,10 +84,16 @@ function(add_ur_target_compile_options name) /W3 /MD$<$:d> /GS + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX ) if(UR_DEVELOPER_MODE) - target_compile_options(${name} PRIVATE /WX /GS) + # _CRT_SECURE_NO_WARNINGS used mainly because of getenv + # C4267: The compiler detected a conversion from size_t to a smaller type. + target_compile_options(${name} PRIVATE + /WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267 + ) endif() endif() endfunction() diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 0723cfe4e7..acea59e1a1 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1101,7 +1101,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) { - *phNativeHandle = reinterpret_cast(hDevice->get()); + *phNativeHandle = reinterpret_cast( + static_cast(hDevice->get())); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 1f336dd2d7..3168c008a3 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -146,7 +146,7 @@ urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type, std::make_pair(image_channel_type, num_channels)); cuda_format = cuda_format_and_size.first; pixel_size_bytes = cuda_format_and_size.second; - } catch (std::out_of_range &e) { + } catch (const std::out_of_range &) { return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; } } @@ -276,7 +276,7 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler, ImageTexDesc.mipmapFilterMode = MipFilterMode; ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp; ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp; - ImageTexDesc.maxAnisotropy = hSampler->MaxAnisotropy; + ImageTexDesc.maxAnisotropy = static_cast(hSampler->MaxAnisotropy); // The address modes can interfere with other dimensionsenqueueEventsWait // e.g. 1D texture sampling can be interfered with when setting other diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 9b7959eb85..022fd258f7 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -141,7 +141,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs); if (Valid) { Options.push_back(CU_JIT_MAX_REGISTERS); - OptionVals.push_back(reinterpret_cast(MaxRegs)); + OptionVals.push_back( + reinterpret_cast(static_cast(MaxRegs))); } } diff --git a/source/adapters/cuda/sampler.cpp b/source/adapters/cuda/sampler.cpp index ce4283edd3..5ebccf516b 100644 --- a/source/adapters/cuda/sampler.cpp +++ b/source/adapters/cuda/sampler.cpp @@ -18,7 +18,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, new ur_sampler_handle_t_(hContext)}; if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) { - Sampler->Props |= pDesc->normalizedCoords; + Sampler->Props |= static_cast(pDesc->normalizedCoords); Sampler->Props |= pDesc->filterMode << 1; Sampler->Props |= pDesc->addressingMode << 2; } else { diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 5f7fffba35..c9c5af6453 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -15,26 +15,9 @@ #include "memory.hpp" #include "queue.hpp" -namespace { +extern size_t imageElementByteSize(hipArray_Format ArrayFormat); -static size_t imageElementByteSize(hipArray_Format ArrayFormat) { - switch (ArrayFormat) { - case HIP_AD_FORMAT_UNSIGNED_INT8: - case HIP_AD_FORMAT_SIGNED_INT8: - return 1; - case HIP_AD_FORMAT_UNSIGNED_INT16: - case HIP_AD_FORMAT_SIGNED_INT16: - case HIP_AD_FORMAT_HALF: - return 2; - case HIP_AD_FORMAT_UNSIGNED_INT32: - case HIP_AD_FORMAT_SIGNED_INT32: - case HIP_AD_FORMAT_FLOAT: - return 4; - default: - detail::ur::die("Invalid image format."); - } - return 0; -} +namespace { ur_result_t enqueueEventsWait(ur_queue_handle_t, hipStream_t Stream, uint32_t NumEventsInWaitList, diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 68ded26263..aa3abbdaf4 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -13,10 +13,8 @@ #include #include -namespace { - -size_t GetHipFormatPixelSize(hipArray_Format Format) { - switch (Format) { +size_t imageElementByteSize(hipArray_Format ArrayFormat) { + switch (ArrayFormat) { case HIP_AD_FORMAT_UNSIGNED_INT8: case HIP_AD_FORMAT_SIGNED_INT8: return 1; @@ -31,10 +29,9 @@ size_t GetHipFormatPixelSize(hipArray_Format Format) { default: detail::ur::die("Invalid HIP format specifier"); } + return 0; } -} // namespace - /// Decreases the reference count of the Mem object. /// If this is zero, calls the relevant HIP Free function /// \return UR_RESULT_SUCCESS unless deallocation error @@ -245,7 +242,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UR_CHECK_ERROR( hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray(Device))); const auto PixelSizeBytes = - GetHipFormatPixelSize(ArrayDescriptor.Format) * + imageElementByteSize(ArrayDescriptor.Format) * ArrayDescriptor.NumChannels; const auto ImageSizeBytes = PixelSizeBytes * @@ -405,25 +402,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } }; - const auto hipFormatToElementSize = - [](hipArray_Format HipFormat) -> size_t { - switch (HipFormat) { - case HIP_AD_FORMAT_UNSIGNED_INT8: - case HIP_AD_FORMAT_SIGNED_INT8: - return 1; - case HIP_AD_FORMAT_UNSIGNED_INT16: - case HIP_AD_FORMAT_SIGNED_INT16: - case HIP_AD_FORMAT_HALF: - return 2; - case HIP_AD_FORMAT_UNSIGNED_INT32: - case HIP_AD_FORMAT_SIGNED_INT32: - case HIP_AD_FORMAT_FLOAT: - return 4; - default: - detail::ur::die("Invalid Hip format specified."); - } - }; - switch (propName) { case UR_IMAGE_INFO_FORMAT: return ReturnValue(ur_image_format_t{UR_IMAGE_CHANNEL_ORDER_RGBA, @@ -435,7 +413,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, case UR_IMAGE_INFO_DEPTH: return ReturnValue(ArrayInfo.Depth); case UR_IMAGE_INFO_ELEMENT_SIZE: - return ReturnValue(hipFormatToElementSize(ArrayInfo.Format)); + return ReturnValue(imageElementByteSize(ArrayInfo.Format)); case UR_IMAGE_INFO_ROW_PITCH: case UR_IMAGE_INFO_SLICE_PITCH: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 7203d5a238..250eaccab2 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -124,7 +124,7 @@ add_ur_adapter(${TARGET_NAME} # TODO: fix level_zero adapter conversion warnings target_compile_options(${TARGET_NAME} PRIVATE - $<$:/wd4267 /wd4805 /wd4244 /D_CRT_SECURE_NO_WARNINGS> + $<$:/wd4805 /wd4244> ) set_target_properties(${TARGET_NAME} PROPERTIES diff --git a/source/adapters/native_cpu/context.cpp b/source/adapters/native_cpu/context.cpp index 962525d1fc..c485725828 100644 --- a/source/adapters/native_cpu/context.cpp +++ b/source/adapters/native_cpu/context.cpp @@ -17,10 +17,10 @@ #include "common.hpp" #include "context.hpp" -UR_APIEXPORT ur_result_t UR_APICALL -urContextCreate(uint32_t DeviceCount, const ur_device_handle_t *phDevices, - const ur_context_properties_t *pProperties, - ur_context_handle_t *phContext) { +UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( + [[maybe_unused]] uint32_t DeviceCount, const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext) { std::ignore = pProperties; assert(DeviceCount == 1); diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 5a34aa6584..00fc29eddd 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -22,8 +22,6 @@ #include #ifdef _WIN32 -#define NOMINMAX - #include inline int ur_getpid(void) { return static_cast(GetCurrentProcessId()); } #else diff --git a/test/adapters/cuda/urDeviceCreateWithNativeHandle.cpp b/test/adapters/cuda/urDeviceCreateWithNativeHandle.cpp index 3b8ebc416b..dca7932606 100644 --- a/test/adapters/cuda/urDeviceCreateWithNativeHandle.cpp +++ b/test/adapters/cuda/urDeviceCreateWithNativeHandle.cpp @@ -15,8 +15,8 @@ TEST_F(urCudaDeviceCreateWithNativeHandle, Success) { CUdevice cudaDevice; ASSERT_SUCCESS_CUDA(cuDeviceGet(&cudaDevice, 0)); - ur_native_handle_t nativeCuda = - reinterpret_cast(cudaDevice); + ur_native_handle_t nativeCuda = reinterpret_cast( + static_cast(cudaDevice)); ur_device_handle_t urDevice; ASSERT_SUCCESS(urDeviceCreateWithNativeHandle(nativeCuda, platform, nullptr, &urDevice)); diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index 6c917914ed..11a8b501a8 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -281,7 +281,7 @@ DevicesEnvironment::DevicesEnvironment(int argc, char **argv) error = "urDeviceGet() failed to get devices."; return; } - for (u_long i = 0; i < count; i++) { + for (unsigned i = 0; i < count; i++) { size_t size; if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, 0, nullptr, &size)) { From 67c3779cbb13ca68f339519ff15f46faa4d71fbd Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 12 Dec 2023 09:43:09 +0000 Subject: [PATCH 04/32] AMDGPU enable global variable read write --- source/adapters/hip/enqueue.cpp | 68 ++++++++++++++++++++++++++++++--- source/adapters/hip/program.cpp | 18 +++++++++ source/adapters/hip/program.hpp | 2 + 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 7875650b85..ff49e5506a 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1545,15 +1545,71 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t, - const void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + // Since HIP requires a the global variable to be referenced by name, we use + // metadata to find the correct name to access it by. + auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); + if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) + return UR_RESULT_ERROR_INVALID_VALUE; + std::string DeviceGlobalName = DeviceGlobalNameIt->second; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + hipDeviceptr_t DeviceGlobal = 0; + size_t DeviceGlobalSize = 0; + UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize, + hProgram->get(), + DeviceGlobalName.c_str())); + + if (offset + count > DeviceGlobalSize) + return UR_RESULT_ERROR_INVALID_VALUE; + + return urEnqueueUSMMemcpy( + hQueue, blockingWrite, + reinterpret_cast(reinterpret_cast(DeviceGlobal) + + offset), + pSrc, count, numEventsInWaitList, phEventWaitList, phEvent); + } catch (ur_result_t Err) { + Result = Err; + } + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t, - void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + // Since HIP requires a the global variable to be referenced by name, we use + // metadata to find the correct name to access it by. + auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); + if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) + return UR_RESULT_ERROR_INVALID_VALUE; + std::string DeviceGlobalName = DeviceGlobalNameIt->second; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + hipDeviceptr_t DeviceGlobal = 0; + size_t DeviceGlobalSize = 0; + UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize, + hProgram->get(), + DeviceGlobalName.c_str())); + + if (offset + count > DeviceGlobalSize) + return UR_RESULT_ERROR_INVALID_VALUE; + + return urEnqueueUSMMemcpy( + hQueue, blockingRead, pDst, + reinterpret_cast( + reinterpret_cast(DeviceGlobal) + offset), + count, numEventsInWaitList, phEventWaitList, phEvent); + } catch (ur_result_t Err) { + Result = Err; + } + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 9aa64151e0..81f1be1194 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -78,6 +78,15 @@ void getCoMgrBuildLog(const amd_comgr_data_set_t BuildDataSet, char *BuildLog, } // namespace #endif +std::pair +splitMetadataName(const std::string &metadataName) { + size_t splitPos = metadataName.rfind('@'); + if (splitPos == std::string::npos) + return std::make_pair(metadataName, std::string{}); + return std::make_pair(metadataName.substr(0, splitPos), + metadataName.substr(splitPos, metadataName.length())); +} + ur_result_t ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata, size_t Length) { @@ -85,10 +94,19 @@ ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata, const ur_program_metadata_t MetadataElement = Metadata[i]; std::string MetadataElementName{MetadataElement.pName}; + auto [Prefix, Tag] = splitMetadataName(MetadataElementName); + if (MetadataElementName == __SYCL_UR_PROGRAM_METADATA_TAG_NEED_FINALIZATION) { assert(MetadataElement.type == UR_PROGRAM_METADATA_TYPE_UINT32); IsRelocatable = MetadataElement.value.data32; + } else if (Tag == __SYCL_UR_PROGRAM_METADATA_GLOBAL_ID_MAPPING) { + const char *MetadataValPtr = + reinterpret_cast(MetadataElement.value.pData) + + sizeof(std::uint64_t); + const char *MetadataValPtrEnd = + MetadataValPtr + MetadataElement.size - sizeof(std::uint64_t); + GlobalIDMD[Prefix] = std::string{MetadataValPtr, MetadataValPtrEnd}; } } return UR_RESULT_SUCCESS; diff --git a/source/adapters/hip/program.hpp b/source/adapters/hip/program.hpp index 4b4e5ec878..dbdf9c55c6 100644 --- a/source/adapters/hip/program.hpp +++ b/source/adapters/hip/program.hpp @@ -29,6 +29,8 @@ struct ur_program_handle_t_ { // Metadata bool IsRelocatable = false; + std::unordered_map GlobalIDMD; + constexpr static size_t MAX_LOG_SIZE = 8192u; char ErrorLog[MAX_LOG_SIZE], InfoLog[MAX_LOG_SIZE]; From c9fba562feb5ea5acfbcc0dbbc5d8a15e9383a2b Mon Sep 17 00:00:00 2001 From: Hugh Delaney <46290137+hdelan@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:07:36 +0000 Subject: [PATCH 05/32] Update source/adapters/hip/enqueue.cpp Co-authored-by: Jakub Chlanda --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index ff49e5506a..56dfd20948 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1549,7 +1549,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Since HIP requires a the global variable to be referenced by name, we use + // Since HIP requires the global variable to be referenced by name, we use // metadata to find the correct name to access it by. auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) From de02e990ea8876db0d54903c6b3e3af8ce88ce6d Mon Sep 17 00:00:00 2001 From: Hugh Delaney <46290137+hdelan@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:07:58 +0000 Subject: [PATCH 06/32] Update source/adapters/hip/enqueue.cpp Co-authored-by: Jakub Chlanda --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 56dfd20948..144191bd35 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1583,7 +1583,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Since HIP requires a the global variable to be referenced by name, we use + // Since HIP requires the global variable to be referenced by name, we use // metadata to find the correct name to access it by. auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) From 45d76b7817c9654a8ebbbd0a02744f7ceb753227 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 20 Dec 2023 11:42:37 +0000 Subject: [PATCH 07/32] Refactor read write funcs --- source/adapters/hip/enqueue.cpp | 73 +++++++++++++++------------------ 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 144191bd35..0e7e04fc45 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1544,19 +1544,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +namespace { + +enum class GlobalVariableCopy { Read, Write }; + +ur_result_t deviceGlobalCopyHelper( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, - bool blockingWrite, size_t count, size_t offset, const void *pSrc, + bool blocking, size_t count, size_t offset, void *ptr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - // Since HIP requires the global variable to be referenced by name, we use + ur_event_handle_t *phEvent, GlobalVariableCopy CopyType) { + // Since HIP requires a the global variable to be referenced by name, we use // metadata to find the correct name to access it by. auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) return UR_RESULT_ERROR_INVALID_VALUE; std::string DeviceGlobalName = DeviceGlobalNameIt->second; - ur_result_t Result = UR_RESULT_SUCCESS; try { hipDeviceptr_t DeviceGlobal = 0; size_t DeviceGlobalSize = 0; @@ -1567,15 +1570,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( if (offset + count > DeviceGlobalSize) return UR_RESULT_ERROR_INVALID_VALUE; - return urEnqueueUSMMemcpy( - hQueue, blockingWrite, - reinterpret_cast(reinterpret_cast(DeviceGlobal) + - offset), - pSrc, count, numEventsInWaitList, phEventWaitList, phEvent); + void *pSrc, *pDst; + if (CopyType == GlobalVariableCopy::Write) { + pSrc = ptr; + pDst = reinterpret_cast(DeviceGlobal) + offset; + } else { + pSrc = reinterpret_cast(DeviceGlobal) + offset; + pDst = ptr; + } + return urEnqueueUSMMemcpy(hQueue, blocking, pDst, pSrc, count, + numEventsInWaitList, phEventWaitList, phEvent); } catch (ur_result_t Err) { - Result = Err; + return Err; } - return Result; +} +} // namespace + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return deviceGlobalCopyHelper(hQueue, hProgram, name, blockingWrite, count, + offset, const_cast(pSrc), + numEventsInWaitList, phEventWaitList, phEvent, + GlobalVariableCopy::Write); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( @@ -1583,33 +1602,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Since HIP requires the global variable to be referenced by name, we use - // metadata to find the correct name to access it by. - auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); - if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) - return UR_RESULT_ERROR_INVALID_VALUE; - std::string DeviceGlobalName = DeviceGlobalNameIt->second; - - ur_result_t Result = UR_RESULT_SUCCESS; - try { - hipDeviceptr_t DeviceGlobal = 0; - size_t DeviceGlobalSize = 0; - UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize, - hProgram->get(), - DeviceGlobalName.c_str())); - - if (offset + count > DeviceGlobalSize) - return UR_RESULT_ERROR_INVALID_VALUE; - - return urEnqueueUSMMemcpy( - hQueue, blockingRead, pDst, - reinterpret_cast( - reinterpret_cast(DeviceGlobal) + offset), - count, numEventsInWaitList, phEventWaitList, phEvent); - } catch (ur_result_t Err) { - Result = Err; - } - return Result; + return deviceGlobalCopyHelper( + hQueue, hProgram, name, blockingRead, count, offset, pDst, + numEventsInWaitList, phEventWaitList, phEvent, GlobalVariableCopy::Read); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( From 2e156f790c2e8473abba198f91b544c1db0ca526 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 3 Nov 2023 10:25:52 +0000 Subject: [PATCH 08/32] [EXP][CMDBUF] Add extra event to get CommandBuffer start time Adds an extra event in the first command list associated to the CommandBuffer execution to obtain the start time of the graph execution. --- source/adapters/level_zero/command_buffer.cpp | 19 +++++++++++++++++++ source/adapters/level_zero/event.cpp | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 4b811ab033..af5fa73077 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -941,6 +941,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ZE2UR_CALL(zeCommandListAppendBarrier, (SignalCommandList->first, RetEvent->ZeEvent, 1, &(CommandBuffer->SignalEvent->ZeEvent))); + + if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { + // We create an additional signal specific to the current execution of the + // CommandBuffer. This signal is needed for profiling the execution time + // of the CommandBuffer. It waits for the WaitEvent to be signaled + // which indicates the start of the CommandBuffer actual execution. + // This event is embedded into the Event return to the user to allow + // the profiling engine to retrieve it. + ur_event_handle_t StartEvent{}; + UR_CALL(createEventAndAssociateQueue( + Queue, &StartEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, + WaitCommandList, false)); + + ZE2UR_CALL(zeCommandListAppendBarrier, + (WaitCommandList->first, StartEvent->ZeEvent, 1, + &(CommandBuffer->WaitEvent->ZeEvent))); + + RetEvent->CommandData = StartEvent; + } } // Execution our command-lists asynchronously diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index d8af1e674d..8dfef4f099 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -13,6 +13,7 @@ #include #include +#include "command_buffer.hpp" #include "common.hpp" #include "event.hpp" #include "ur_level_zero.hpp" @@ -454,6 +455,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ///< bytes returned in propValue ) { std::shared_lock EventLock(Event->Mutex); + + // A Command-buffer consists of three command-lists. + // The start time should therefore be taken from an event associated + // to the first command-list. + if ((Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) && + (PropName == UR_PROFILING_INFO_COMMAND_START) && (Event->CommandData)) { + auto StartEvent = static_cast(Event->CommandData); + return urEventGetProfilingInfo(StartEvent, UR_PROFILING_INFO_COMMAND_END, + PropValueSize, PropValue, PropValueSizeRet); + } + if (Event->UrQueue && (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; @@ -763,6 +775,13 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { return Res; Event->CommandData = nullptr; } + if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP && + Event->CommandData) { + // Free the memory extra event allocated for profiling purposed. + auto AssociateEvent = static_cast(Event->CommandData); + urEventRelease(AssociateEvent); + Event->CommandData = nullptr; + } if (Event->OwnNativeHandle) { if (DisableEventsCaching) { auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent)); From e8b78408b0b2d6547b7aa07ed3599a54484d6a2e Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 9 Nov 2023 16:44:33 +0000 Subject: [PATCH 09/32] Copy command-buffer event timestamps into a dedicated USM memory region. Get the command-buffer start and end timestamps from this memory. Move events reset from the middle command list to the first to allow the copy of the profiling info in the last command list and relax command list order. --- source/adapters/level_zero/command_buffer.cpp | 92 ++++++++++++------- source/adapters/level_zero/command_buffer.hpp | 5 + source/adapters/level_zero/event.cpp | 80 +++++++++++++--- 3 files changed, 131 insertions(+), 46 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index af5fa73077..d63630c456 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -45,13 +45,13 @@ │ Prefix │ Commands added to UR command-buffer by UR user │ Suffix │ └──────────┴────────────────────────────────────────────────┴─────────┘ - ┌───────────────────┬──────────────────────────────┐ - Prefix │Reset signal event │ Barrier waiting on wait event│ - └───────────────────┴──────────────────────────────┘ + ┌───────────────────┬──────────────┐──────────────────────────────┐ + Prefix │Reset signal event │ Reset events │ Barrier waiting on wait event│ + └───────────────────┴──────────────┘──────────────────────────────┘ ┌─────────────────────────────────────────────┐──────────────┐ - Suffix │Barrier waiting on sync-point event, │ Reset events │ - │signalling the UR command-buffer signal event│ │ + Suffix │Barrier waiting on sync-point event, │ Query CMD │ + │signalling the UR command-buffer signal event│ Timestamps │ └─────────────────────────────────────────────┘──────────────┘ For a call to `urCommandBufferEnqueueExp` with an event_list `EL`, @@ -433,6 +433,10 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + // Dependencies between commands are explicitly enforced by sync points when + // enqueuing. Consequently, relax the command ordering in the command list + // can enable the backend to further optimize the workload + ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING; ze_command_list_handle_t ZeCommandList; // TODO We could optimize this by pooling both Level Zero command-lists and UR @@ -499,13 +503,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { ZE2UR_CALL(zeCommandListAppendEventReset, (CommandBuffer->ZeCommandList, CommandBuffer->WaitEvent->ZeEvent)); - // Reset the L0 events we use for command-buffer internal sync-points to the - // non-signalled state - for (auto Event : WaitEventList) { - ZE2UR_CALL(zeCommandListAppendEventReset, - (CommandBuffer->ZeCommandList, Event)); - } - // Close the command list and have it ready for dispatch. ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandList)); return UR_RESULT_SUCCESS; @@ -899,14 +896,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create command-list to execute before `CommandListPtr` and will signal // when `EventWaitList` dependencies are complete. ur_command_list_ptr_t WaitCommandList{}; + UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList, false, + false)); + + // Create a list of events of all the events that compose the command buffer + // workload. + // This loop also resets the L0 events we use for command-buffer internal + // sync-points to the non-signalled state. + // This is required for multiple submissions. + const size_t NumEvents = CommandBuffer->SyncPoints.size(); + std::vector WaitEventList{NumEvents}; + for (size_t i = 0; i < NumEvents; i++) { + auto ZeEvent = CommandBuffer->SyncPoints[i]->ZeEvent; + WaitEventList[i] = ZeEvent; + ZE2UR_CALL(zeCommandListAppendEventReset, + (WaitCommandList->first, ZeEvent)); + } + if (NumEventsInWaitList) { _ur_ze_event_list_t TmpWaitList; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); - UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList, - false, false)) - // Update the WaitList of the Wait Event // Events are appended to the WaitList if the WaitList is not empty if (CommandBuffer->WaitEvent->WaitList.isEmpty()) @@ -919,9 +930,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( CommandBuffer->WaitEvent->WaitList.Length, CommandBuffer->WaitEvent->WaitList.ZeEventList)); } else { - UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList, - false, false)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, (WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent)); } @@ -943,22 +951,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( &(CommandBuffer->SignalEvent->ZeEvent))); if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { - // We create an additional signal specific to the current execution of the - // CommandBuffer. This signal is needed for profiling the execution time - // of the CommandBuffer. It waits for the WaitEvent to be signaled - // which indicates the start of the CommandBuffer actual execution. - // This event is embedded into the Event return to the user to allow - // the profiling engine to retrieve it. - ur_event_handle_t StartEvent{}; - UR_CALL(createEventAndAssociateQueue( - Queue, &StartEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, - WaitCommandList, false)); - - ZE2UR_CALL(zeCommandListAppendBarrier, - (WaitCommandList->first, StartEvent->ZeEvent, 1, - &(CommandBuffer->WaitEvent->ZeEvent))); - - RetEvent->CommandData = StartEvent; + // Multiple submissions of a command buffer implies that we need to save + // the event timestamps before resubmiting the command buffer. We + // therefore copy the these timestamps in a dedicated USM memory section + // before completing the command buffer execution, and then attach this + // memory to the event returned to users to allow to allow the profiling + // engine to recover these timestamps. + ur_usm_desc_t USMDesc{}; + ur_usm_device_desc_t UsmDeviceDesc{}; + UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; + ur_usm_host_desc_t UsmHostDesc{}; + UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; + UsmDeviceDesc.pNext = &UsmHostDesc; + USMDesc.pNext = &UsmDeviceDesc; + USMDesc.align = 4; // 4byte-aligned + + size_t Size = WaitEventList.size() * sizeof(ze_kernel_timestamp_result_t); + + struct command_buffer_profiling_t *Profiling = + new command_buffer_profiling_t(); + + Profiling->NumEvents = WaitEventList.size(); + + urUSMSharedAlloc(RetEvent->Context, CommandBuffer->Device, &USMDesc, + nullptr, Size, (void **)&Profiling->Timestamps); + + ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, + (SignalCommandList->first, WaitEventList.size(), + WaitEventList.data(), Profiling->Timestamps, 0, + RetEvent->ZeEvent, 1, + &(CommandBuffer->SignalEvent->ZeEvent))); + + RetEvent->CommandData = static_cast(Profiling); } } diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index b18f1c3497..a43e9e4c52 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -19,6 +19,11 @@ #include "context.hpp" #include "queue.hpp" +struct command_buffer_profiling_t { + ur_exp_command_buffer_sync_point_t NumEvents; + ze_kernel_timestamp_result_t *Timestamps; +}; + struct ur_exp_command_buffer_handle_t_ : public _ur_object { ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device, diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 8dfef4f099..5e9397e4e7 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -456,16 +456,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ) { std::shared_lock EventLock(Event->Mutex); - // A Command-buffer consists of three command-lists. - // The start time should therefore be taken from an event associated - // to the first command-list. - if ((Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) && - (PropName == UR_PROFILING_INFO_COMMAND_START) && (Event->CommandData)) { - auto StartEvent = static_cast(Event->CommandData); - return urEventGetProfilingInfo(StartEvent, UR_PROFILING_INFO_COMMAND_END, - PropValueSize, PropValue, PropValueSizeRet); - } - if (Event->UrQueue && (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; @@ -482,6 +472,70 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ze_kernel_timestamp_result_t tsResult; + // A Command-buffer consists of three command-lists for which only a single + // event is returned to users. The actual profiling information related to the + // command-buffer should therefore be extrated from graph events themsleves. + // The timestamps of these events are saved in a memory region attached to + // event usning CommandData field. The timings must therefore be recovered + // from this memory. + if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { + if (Event->CommandData) { + struct command_buffer_profiling_t *ProfilingsPtr; + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_START: { + ProfilingsPtr = static_cast( + Event->CommandData); + // Sync-point order does not necessarily match to the order of + // execution. We therefore look for the first command executed. + uint64_t MinStart = ProfilingsPtr->Timestamps->global.kernelStart; + for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { + uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelStart; + if (Timestamp < MinStart) { + MinStart = Timestamp; + } + } + uint64_t ContextStartTime = + (MinStart & TimestampMaxValue) * ZeTimerResolution; + return ReturnValue(ContextStartTime); + } + case UR_PROFILING_INFO_COMMAND_END: { + ProfilingsPtr = static_cast( + Event->CommandData); + // Sync-point order does not necessarily match to the order of + // execution. We therefore look for the last command executed. + uint64_t MaxEnd = ProfilingsPtr->Timestamps->global.kernelEnd; + uint64_t LastStart = ProfilingsPtr->Timestamps->global.kernelStart; + for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { + uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelEnd; + if (Timestamp > MaxEnd) { + MaxEnd = Timestamp; + LastStart = ProfilingsPtr->Timestamps[i].global.kernelStart; + } + } + uint64_t ContextStartTime = (LastStart & TimestampMaxValue); + uint64_t ContextEndTime = (MaxEnd & TimestampMaxValue); + + // + // Handle a possible wrap-around (the underlying HW counter is < + // 64-bit). Note, it will not report correct time if there were multiple + // wrap arounds, and the longer term plan is to enlarge the capacity of + // the HW timestamps. + // + if (ContextEndTime <= ContextStartTime) { + ContextEndTime += TimestampMaxValue; + } + ContextEndTime *= ZeTimerResolution; + return ReturnValue(ContextEndTime); + } + default: + urPrint("urEventGetProfilingInfo: not supported ParamName\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } + } + switch (PropName) { case UR_PROFILING_INFO_COMMAND_START: { ZE2UR_CALL(zeEventQueryKernelTimestamp, (Event->ZeEvent, &tsResult)); @@ -778,8 +832,10 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP && Event->CommandData) { // Free the memory extra event allocated for profiling purposed. - auto AssociateEvent = static_cast(Event->CommandData); - urEventRelease(AssociateEvent); + struct command_buffer_profiling_t *ProfilingPtr = + static_cast(Event->CommandData); + urUSMFree(Event->Context, (void *)ProfilingPtr->Timestamps); + delete ProfilingPtr; Event->CommandData = nullptr; } if (Event->OwnNativeHandle) { From bd25d685cde0c23f66d1b4e8e499e3da7bdafddd Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 10 Nov 2023 17:29:42 +0000 Subject: [PATCH 10/32] Changes USMShared memory allocation for host only allocation --- source/adapters/level_zero/command_buffer.cpp | 33 +++++++------------ source/adapters/level_zero/event.cpp | 22 ++++++------- 2 files changed, 23 insertions(+), 32 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index d63630c456..7f3f514d9d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -939,6 +939,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create a command-list to signal RetEvent on completion ur_command_list_ptr_t SignalCommandList{}; if (Event) { + ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent; UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, false, false)); @@ -946,10 +947,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, SignalCommandList, false)); - ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(CommandBuffer->SignalEvent->ZeEvent))); - if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { // Multiple submissions of a command buffer implies that we need to save // the event timestamps before resubmiting the command buffer. We @@ -957,33 +954,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // before completing the command buffer execution, and then attach this // memory to the event returned to users to allow to allow the profiling // engine to recover these timestamps. - ur_usm_desc_t USMDesc{}; - ur_usm_device_desc_t UsmDeviceDesc{}; - UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; - ur_usm_host_desc_t UsmHostDesc{}; - UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; - UsmDeviceDesc.pNext = &UsmHostDesc; - USMDesc.pNext = &UsmDeviceDesc; - USMDesc.align = 4; // 4byte-aligned - - size_t Size = WaitEventList.size() * sizeof(ze_kernel_timestamp_result_t); + UR_CALL(createEventAndAssociateQueue( + Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false)); - struct command_buffer_profiling_t *Profiling = - new command_buffer_profiling_t(); + command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); Profiling->NumEvents = WaitEventList.size(); - - urUSMSharedAlloc(RetEvent->Context, CommandBuffer->Device, &USMDesc, - nullptr, Size, (void **)&Profiling->Timestamps); + Profiling->Timestamps = + new ze_kernel_timestamp_result_t[Profiling->NumEvents]; ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, (SignalCommandList->first, WaitEventList.size(), - WaitEventList.data(), Profiling->Timestamps, 0, - RetEvent->ZeEvent, 1, + WaitEventList.data(), (void *)Profiling->Timestamps, 0, + SyncEvent->ZeEvent, 1, &(CommandBuffer->SignalEvent->ZeEvent))); RetEvent->CommandData = static_cast(Profiling); } + + ZE2UR_CALL(zeCommandListAppendBarrier, + (SignalCommandList->first, RetEvent->ZeEvent, 1, + &(SyncEvent->ZeEvent))); } // Execution our command-lists asynchronously diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 5e9397e4e7..2dc74ff5ac 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -480,14 +480,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( // from this memory. if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { if (Event->CommandData) { - struct command_buffer_profiling_t *ProfilingsPtr; + command_buffer_profiling_t *ProfilingsPtr; switch (PropName) { case UR_PROFILING_INFO_COMMAND_START: { - ProfilingsPtr = static_cast( - Event->CommandData); + ProfilingsPtr = + static_cast(Event->CommandData); // Sync-point order does not necessarily match to the order of // execution. We therefore look for the first command executed. - uint64_t MinStart = ProfilingsPtr->Timestamps->global.kernelStart; + uint64_t MinStart = ProfilingsPtr->Timestamps[0].global.kernelStart; for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelStart; if (Timestamp < MinStart) { @@ -499,12 +499,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return ReturnValue(ContextStartTime); } case UR_PROFILING_INFO_COMMAND_END: { - ProfilingsPtr = static_cast( - Event->CommandData); + ProfilingsPtr = + static_cast(Event->CommandData); // Sync-point order does not necessarily match to the order of // execution. We therefore look for the last command executed. - uint64_t MaxEnd = ProfilingsPtr->Timestamps->global.kernelEnd; - uint64_t LastStart = ProfilingsPtr->Timestamps->global.kernelStart; + uint64_t MaxEnd = ProfilingsPtr->Timestamps[0].global.kernelEnd; + uint64_t LastStart = ProfilingsPtr->Timestamps[0].global.kernelStart; for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelEnd; if (Timestamp > MaxEnd) { @@ -832,9 +832,9 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP && Event->CommandData) { // Free the memory extra event allocated for profiling purposed. - struct command_buffer_profiling_t *ProfilingPtr = - static_cast(Event->CommandData); - urUSMFree(Event->Context, (void *)ProfilingPtr->Timestamps); + command_buffer_profiling_t *ProfilingPtr = + static_cast(Event->CommandData); + delete[] ProfilingPtr->Timestamps; delete ProfilingPtr; Event->CommandData = nullptr; } From 1db8fbf4f0c775719cdd2b42de58ddb978ea4e47 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 13 Nov 2023 12:02:23 +0000 Subject: [PATCH 11/32] Fixes event leak --- source/adapters/level_zero/command_buffer.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 7f3f514d9d..579be4447d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -939,7 +939,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create a command-list to signal RetEvent on completion ur_command_list_ptr_t SignalCommandList{}; if (Event) { - ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent; UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, false, false)); @@ -954,9 +953,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // before completing the command buffer execution, and then attach this // memory to the event returned to users to allow to allow the profiling // engine to recover these timestamps. - UR_CALL(createEventAndAssociateQueue( - Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false)); - command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); Profiling->NumEvents = WaitEventList.size(); @@ -966,15 +962,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, (SignalCommandList->first, WaitEventList.size(), WaitEventList.data(), (void *)Profiling->Timestamps, 0, - SyncEvent->ZeEvent, 1, + RetEvent->ZeEvent, 1, &(CommandBuffer->SignalEvent->ZeEvent))); RetEvent->CommandData = static_cast(Profiling); + } else { + ZE2UR_CALL(zeCommandListAppendBarrier, + (SignalCommandList->first, RetEvent->ZeEvent, 1, + &(CommandBuffer->SignalEvent->ZeEvent))); } - - ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(SyncEvent->ZeEvent))); } // Execution our command-lists asynchronously From 35b6a5eab04d57040eb0cf88866c8ef475f27bc9 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 15 Nov 2023 12:12:29 +0000 Subject: [PATCH 12/32] Moves wait-event reset from main CL to suffix CL --- source/adapters/level_zero/command_buffer.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 579be4447d..bbe49cb705 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -498,11 +498,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { (CommandBuffer->ZeCommandList, CommandBuffer->SignalEvent->ZeEvent, NumEvents, WaitEventList.data())); - // Reset the wait-event for the UR command-buffer that is signalled when its - // submission dependencies have been satisfied. - ZE2UR_CALL(zeCommandListAppendEventReset, - (CommandBuffer->ZeCommandList, CommandBuffer->WaitEvent->ZeEvent)); - // Close the command list and have it ready for dispatch. ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandList)); return UR_RESULT_SUCCESS; @@ -938,10 +933,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_event_handle_t RetEvent{}; // Create a command-list to signal RetEvent on completion ur_command_list_ptr_t SignalCommandList{}; - if (Event) { - UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, - false, false)); + UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, + false, false)); + // Reset the wait-event for the UR command-buffer that is signalled when its + // submission dependencies have been satisfied. + ZE2UR_CALL(zeCommandListAppendEventReset, + (SignalCommandList->first, CommandBuffer->WaitEvent->ZeEvent)); + if (Event) { UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, SignalCommandList, false)); @@ -968,8 +967,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( RetEvent->CommandData = static_cast(Profiling); } else { ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(CommandBuffer->SignalEvent->ZeEvent))); + (SignalCommandList->first, RetEvent->ZeEvent, 1, + &(CommandBuffer->SignalEvent->ZeEvent))); } } From 929c9789c354d71ecf1b1fe0091767d075298ee4 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Tue, 9 Jan 2024 17:12:30 +0100 Subject: [PATCH 13/32] [Security] add permissions to workflows --- .github/workflows/bandit.yml | 3 +++ .github/workflows/cmake.yml | 3 +++ .github/workflows/codeql.yml | 3 +++ .github/workflows/coverage.yml | 3 +++ .github/workflows/coverity.yml | 2 ++ .github/workflows/e2e_nightly.yml | 3 +++ .github/workflows/nightly.yml | 3 +++ .github/workflows/prerelease.yml | 3 +++ 8 files changed, 23 insertions(+) diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index e356e0bdb4..bb04c5056c 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -7,6 +7,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + contents: read + jobs: linux: name: Bandit diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e697dd6aaf..0976922723 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -6,6 +6,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + contents: read + jobs: ubuntu-build: name: Build - Ubuntu diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index bf312be396..4806c23d3c 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -6,6 +6,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + contents: read + jobs: analyze-ubuntu: name: Analyze on Ubuntu diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 731f7ea320..50f28151ac 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -2,6 +2,9 @@ name: Coverage on: [push, pull_request] +permissions: + contents: read + jobs: ubuntu-build: name: Build - Ubuntu diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ab065ee77e..3b3853c604 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -23,6 +23,8 @@ env: COVERITY_SCAN_BRANCH_PATTERN: "main" TRAVIS_BRANCH: ${{ github.ref_name }} +permissions: + contents: read jobs: linux: diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml index e3cda49245..c987b1f0ef 100644 --- a/.github/workflows/e2e_nightly.yml +++ b/.github/workflows/e2e_nightly.yml @@ -5,6 +5,9 @@ on: # Run every day at 23:00 UTC - cron: '0 23 * * *' +permissions: + contents: read + jobs: e2e-build-hw: name: Build SYCL, UR, run E2E diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 4a81c94e8f..bcff394049 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -5,6 +5,9 @@ on: # Run every day at 23:00 UTC - cron: '0 23 * * *' +permissions: + contents: read + jobs: long-fuzz-test: name: Run long fuzz tests diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 882b06985a..80330b2b4d 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -6,6 +6,9 @@ on: # At 23:00 on Friday, GitHub actions schedule is in UTC time. - cron: 0 23 * * 5 +permissions: + contents: read + jobs: weekly-prerelease: runs-on: ubuntu-latest From d358b972dc99a77001084bd97d92e71ae48e6a87 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Mon, 27 Nov 2023 09:26:20 -0800 Subject: [PATCH 14/32] [UR][L0] Make urPlatformGetBackendOption return -ze-opt-level=2 for -O2 Signed-off-by: Sarnie, Nick --- source/adapters/level_zero/platform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index b7680b1638..e67c84f3e4 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -433,8 +433,8 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { // Returns plugin specific backend option. // Current support is only for optimization options. // Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=1' for frontend_option = -O1 or -O2. -// Return '-ze-opt-level=2' for frontend_option = -O3. +// Return '-ze-opt-level=1' for frontend_option = -O1. +// Return '-ze-opt-level=2' for frontend_option = -O2 or -O3. // Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for // frontend_option=-ftarget-compile-fast. UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( @@ -457,11 +457,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *PlatformOption = "-ze-opt-disable"; return UR_RESULT_SUCCESS; } - if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv) { + if (FrontendOption == "-O1"sv) { *PlatformOption = "-ze-opt-level=1"; return UR_RESULT_SUCCESS; } - if (FrontendOption == "-O3"sv) { + if (FrontendOption == "-O2"sv || FrontendOption == "-O3"sv) { *PlatformOption = "-ze-opt-level=2"; return UR_RESULT_SUCCESS; } From b3a1d52d830f5c7533ce3d861eada6035fad5d98 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Tue, 28 Nov 2023 07:00:04 -0800 Subject: [PATCH 15/32] also map O1 to -ze-opt-level=2 Signed-off-by: Sarnie, Nick --- source/adapters/level_zero/platform.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index e67c84f3e4..335a920294 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -433,8 +433,7 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { // Returns plugin specific backend option. // Current support is only for optimization options. // Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=1' for frontend_option = -O1. -// Return '-ze-opt-level=2' for frontend_option = -O2 or -O3. +// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. // Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for // frontend_option=-ftarget-compile-fast. UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( @@ -457,11 +456,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *PlatformOption = "-ze-opt-disable"; return UR_RESULT_SUCCESS; } - if (FrontendOption == "-O1"sv) { - *PlatformOption = "-ze-opt-level=1"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O2"sv || FrontendOption == "-O3"sv) { + if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || + FrontendOption == "-O3"sv) { *PlatformOption = "-ze-opt-level=2"; return UR_RESULT_SUCCESS; } From da50cd775f7ba34c5340da07a45c1e5aa83c5099 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 10 Jan 2024 12:16:30 +0000 Subject: [PATCH 16/32] [CL] Fix Windows CMake, remove explicit Linux path Fixes #1171 by replacing an explicit Linux path used as the value for the internal `OpenCLICDLoaderLibrary` CMake varaible and replacing it with the `OpenCL::OpenCL` target name. This is an target alias provided by the https://github.com/KhronosGroup/OpenCL-ICD-Loader. --- source/adapters/opencl/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index 96d85bf117..c78e75e94f 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -67,16 +67,14 @@ if(UR_OPENCL_ICD_LOADER_LIBRARY) set(OpenCLICDLoaderLibrary ${UR_OPENCL_ICD_LOADER_LIBRARY}) else() find_package(OpenCL 3.0) - if(OpenCL_FOUND) - set(OpenCLICDLoaderLibrary OpenCL::OpenCL) - else() + if(NOT OpenCL_FOUND) FetchContent_Declare(OpenCL-ICD-Loader GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-ICD-Loader.git" GIT_TAG main ) FetchContent_MakeAvailable(OpenCL-ICD-Loader) - set(OpenCLICDLoaderLibrary ${PROJECT_BINARY_DIR}/lib/libOpenCL.so) endif() + set(OpenCLICDLoaderLibrary OpenCL::OpenCL) endif() message(STATUS "OpenCL Include Directory: ${OpenCLIncludeDirectory}") From 0a2b96a0a393f025d1a23736101968f6174dbd29 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Fri, 8 Dec 2023 13:02:39 +0000 Subject: [PATCH 17/32] [HIP] Allow custom location of ROCm components Allow custom location of HIP/HSA include and HIP library directories using the following CMake variables: * `SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR`, * `SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR`, * `SYCL_BUILD_PI_HIP_LIB_DIR`. --- README.md | 4 ++++ source/adapters/hip/CMakeLists.txt | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1e9c47b755..fc6fa4822e 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,10 @@ List of options provided by CMake: | UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD | | UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` | | UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | +| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` | +| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` | +| UR_HIP_HSA_INCLUDE_DIR | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include""` | +| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib""` | ### Additional make targets diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 90a235ddeb..01b3fc8d9b 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -10,14 +10,14 @@ set(UR_HIP_PLATFORM "AMD" CACHE STRING "UR HIP platform, AMD or NVIDIA") # Set default ROCm installation directory set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") - -set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include") - +# Allow custom location of HIP/HSA include and HIP library directories +set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH + "Custom ROCm HIP include dir") set(UR_HIP_HSA_INCLUDE_DIRS - "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include") - -# Set HIP lib dir -set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib") + "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH + "Custom ROCm HSA include dir") +set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH + "Custom ROCm HIP library dir") # Check if HIP library path exists (AMD platform only) if("${UR_HIP_PLATFORM}" STREQUAL "AMD") From 0dd9a24ab91562294b88dda80694dc4f02cb7d5c Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Thu, 14 Dec 2023 08:07:22 -0500 Subject: [PATCH 18/32] Force the setting of custom vars --- source/adapters/hip/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 01b3fc8d9b..21156ecf62 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -17,7 +17,7 @@ set(UR_HIP_HSA_INCLUDE_DIRS "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH "Custom ROCm HSA include dir") set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH - "Custom ROCm HIP library dir") + "Custom ROCm HIP library dir" FORCE) # Check if HIP library path exists (AMD platform only) if("${UR_HIP_PLATFORM}" STREQUAL "AMD") From 7cbc2eca2a995536b463696edb556debd57b806d Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Mon, 18 Dec 2023 10:10:27 +0100 Subject: [PATCH 19/32] Update source/adapters/hip/CMakeLists.txt Co-authored-by: Andrey Alekseenko --- source/adapters/hip/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 21156ecf62..01b3fc8d9b 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -17,7 +17,7 @@ set(UR_HIP_HSA_INCLUDE_DIRS "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH "Custom ROCm HSA include dir") set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH - "Custom ROCm HIP library dir" FORCE) + "Custom ROCm HIP library dir") # Check if HIP library path exists (AMD platform only) if("${UR_HIP_PLATFORM}" STREQUAL "AMD") From c71a83d290fa575dffe6bf346bf6338996b703be Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 19 Dec 2023 06:38:44 -0500 Subject: [PATCH 20/32] Use PATH --- source/adapters/hip/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 01b3fc8d9b..56e849593f 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -11,12 +11,12 @@ set(UR_HIP_PLATFORM "AMD" CACHE STRING "UR HIP platform, AMD or NVIDIA") # Set default ROCm installation directory set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") # Allow custom location of HIP/HSA include and HIP library directories -set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH +set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE PATH "Custom ROCm HIP include dir") set(UR_HIP_HSA_INCLUDE_DIRS "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH "Custom ROCm HSA include dir") -set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH +set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE PATH "Custom ROCm HIP library dir") # Check if HIP library path exists (AMD platform only) From a1e8fae2d2eb5a4bd0d57ca56e6c579aa4dd842a Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 9 Jan 2024 08:55:11 -0500 Subject: [PATCH 21/32] Check for the new (6.0.0) HSA include location --- source/adapters/hip/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 56e849593f..90162eb2de 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -31,7 +31,8 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD") " please check ROCm installation.") endif() - # Check if HSA include path exists + # Check if HSA include path exists. In rocm-6.0.0 the layout of HSA + # directory has changed, check for the new location as well. foreach(D IN LISTS UR_HIP_HSA_INCLUDE_DIRS) if(EXISTS "${D}") set(UR_HIP_HSA_INCLUDE_DIR "${D}") From ee09bc0221551c9a46f10ae000d0040d9354688e Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Wed, 10 Jan 2024 15:16:04 +0100 Subject: [PATCH 22/32] Add OpenCL to Coverity build --- .github/workflows/coverity.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index efde6f0605..9e46aae20a 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -53,6 +53,7 @@ jobs: -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so -DUR_BUILD_ADAPTER_NATIVE_CPU=ON -DUR_BUILD_ADAPTER_HIP=ON + -DUR_BUILD_ADAPTER_OPENCL=ON - name: Run Coverity run: | From 66d52ace2d8ca79c3274c805caa495964c123680 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 10 Jan 2024 14:16:12 +0000 Subject: [PATCH 23/32] Also update README with UR_HIP_HSA_INCLUDE_DIRS change --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fc6fa4822e..57536f237a 100644 --- a/README.md +++ b/README.md @@ -139,8 +139,8 @@ List of options provided by CMake: | UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | | UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` | | UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` | -| UR_HIP_HSA_INCLUDE_DIR | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include""` | -| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib""` | +| UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` | +| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` | ### Additional make targets From bdf29183609b6d75b60666003eaae18b82c9726b Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Thu, 2 Nov 2023 16:25:03 +0100 Subject: [PATCH 24/32] [UR] Add adapter leak-checking tests --- test/layers/validation/leaks.cpp | 13 ++++++++++ test/layers/validation/leaks.out.match | 36 ++++++++++++++++++++------ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/test/layers/validation/leaks.cpp b/test/layers/validation/leaks.cpp index e32aeafc89..794e8a3ef0 100644 --- a/test/layers/validation/leaks.cpp +++ b/test/layers/validation/leaks.cpp @@ -11,6 +11,19 @@ TEST_F(urTest, testUrAdapterGetLeak) { ASSERT_NE(nullptr, adapter); } +TEST_F(urTest, testUrAdapterRetainLeak) { + ur_adapter_handle_t adapter = nullptr; + ASSERT_EQ(urAdapterGet(1, &adapter, nullptr), UR_RESULT_SUCCESS); + ASSERT_NE(nullptr, adapter); + ASSERT_EQ(urAdapterRetain(adapter), UR_RESULT_SUCCESS); +} + +TEST_F(urTest, testUrAdapterRetainNonexistent) { + ur_adapter_handle_t adapter = (ur_adapter_handle_t)0xBEEF; + ASSERT_EQ(urAdapterRetain(adapter), UR_RESULT_SUCCESS); + ASSERT_NE(nullptr, adapter); +} + TEST_F(valDeviceTest, testUrContextCreateLeak) { ur_context_handle_t context = nullptr; ASSERT_EQ(urContextCreate(1, &device, nullptr, &context), diff --git a/test/layers/validation/leaks.out.match b/test/layers/validation/leaks.out.match index 9fac722527..90f8713765 100644 --- a/test/layers/validation/leaks.out.match +++ b/test/layers/validation/leaks.out.match @@ -1,34 +1,53 @@ +.* +\[ RUN \] urTest.testUrAdapterGetLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] urTest.testUrAdapterRetainLeak +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 +\[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ +\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: +.* +\[ RUN \] urTest.testUrAdapterRetainNonexistent +\[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ +.* +\[ RUN \] valDeviceTest.testUrContextCreateLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextRetainLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextRetainNonexistent +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ -(.*) +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 +.* +\[ RUN \] valDeviceTest.testUrContextCreateSuccess \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextRetainSuccess \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextReleaseLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 @@ -37,11 +56,12 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextReleaseNonexistent \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* From d6930c70bd7406ef406d8c7d609be2efbda70b3f Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Mon, 6 Nov 2023 17:58:15 +0100 Subject: [PATCH 25/32] Add ignore tag to the match.py script --- cmake/match.py | 167 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 111 insertions(+), 56 deletions(-) diff --git a/cmake/match.py b/cmake/match.py index c07cfbc384..3f3f4faff3 100755 --- a/cmake/match.py +++ b/cmake/match.py @@ -5,77 +5,132 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# check if all lines match in a file -# lines in a match file can contain regex inside of double curly braces {{}} +# Check if all input file content matches match file content. +# Lines in a match file can contain regex inside of double curly braces {{}}. +# Regex patterns are limited to single line. +# +# List of available special tags: +# {{OPT}} - makes content in the same line as the tag optional +# {{IGNORE}} - ignores all content until the next successfully matched line or the end of the input +# Special tags are mutually exclusive and are expected to be located at the start of a line. +# +import os import sys import re +from enum import Enum ## @brief print the whole content of input and match files -def print_content(input_lines, match_lines): +def print_content(input_lines, match_lines, ignored_lines): print("--- Input Lines " + "-" * 64) print("".join(input_lines).strip()) print("--- Match Lines " + "-" * 64) print("".join(match_lines).strip()) + print("--- Ignored Lines " + "-" * 62) + print("".join(ignored_lines).strip()) print("-" * 80) -if len(sys.argv) != 3: - print("Usage: python match.py ") - sys.exit(1) - -input_file = sys.argv[1] -match_file = sys.argv[2] - -with open(input_file, 'r') as input, open(match_file, 'r') as match: - input_lines = input.readlines() - match_lines = match.readlines() - -if len(match_lines) < len(input_lines): - print(f"Match length < input length (input: {len(input_lines)}, match: {len(match_lines)})") - print_content(input_lines, match_lines) - sys.exit(1) - -input_idx = 0 -opt = "{{OPT}}" -for i, match_line in enumerate(match_lines): - if match_line.startswith(opt): - optional_line = True - match_line = match_line[len(opt):] - else: - optional_line = False - - # split into parts at {{ }} - match_parts = re.split(r'\{{(.*?)\}}', match_line.strip()) - pattern = "" - for j, part in enumerate(match_parts): - if j % 2 == 0: - pattern += re.escape(part) - else: - pattern += part +## @brief print the incorrect match line +def print_incorrect_match(match_line, present, expected): + print("Line " + str(match_line) + " does not match") + print("is: " + present) + print("expected: " + expected) - # empty input file or end of input file, from now on match file must be optional - if not input_lines: - if optional_line is True: - continue - else: - print("End of input file or empty file.") - print("expected: " + match_line.strip()) + +## @brief pattern matching script status values +class Status(Enum): + INPUT_END = 1 + MATCH_END = 2 + INPUT_AND_MATCH_END = 3 + PROCESSING = 4 + + +## @brief check matching script status +def check_status(input_lines, match_lines): + if not input_lines and not match_lines: + return Status.INPUT_AND_MATCH_END + elif not input_lines: + return Status.INPUT_END + elif not match_lines: + return Status.MATCH_END + return Status.PROCESSING + + +## @brief pattern matching tags. +## Tags are expected to be at the start of the line. +class Tag(Enum): + OPT = "{{OPT}}" # makes the line optional + IGNORE = "{{IGNORE}}" # ignores all input until next match or end of input file + + +## @brief main function for the match file processing script +def main(): + if len(sys.argv) != 3: + print("Usage: python match.py ") + sys.exit(1) + + input_file = sys.argv[1] + match_file = sys.argv[2] + + with open(input_file, 'r') as input, open(match_file, 'r') as match: + input_lines = input.readlines() + match_lines = match.readlines() + + ignored_lines = [] + + input_idx = 0 + match_idx = 0 + tags_in_effect = [] + while True: + # check file status + status = check_status(input_lines[input_idx:], match_lines[match_idx:]) + if (status == Status.INPUT_AND_MATCH_END) or (status == Status.MATCH_END and Tag.IGNORE in tags_in_effect): + # all lines matched or the last line in match file is an ignore tag + sys.exit(0) + elif status == Status.MATCH_END: + print_incorrect_match(match_idx + 1, input_lines[input_idx].strip(), ""); + print_content(input_lines, match_lines, ignored_lines) sys.exit(1) - input_line = input_lines[input_idx].strip() - if not re.fullmatch(pattern, input_line): - if optional_line is True: - continue + input_line = input_lines[input_idx].strip() if input_idx < len(input_lines) else "" + match_line = match_lines[match_idx] + + # check for tags + if match_line.startswith(Tag.OPT.value): + tags_in_effect.append(Tag.OPT) + match_line = match_line[len(Tag.OPT.value):] + elif match_line.startswith(Tag.IGNORE.value): + tags_in_effect.append(Tag.IGNORE) + match_idx += 1 + continue # line with ignore tag should be skipped + + # split into parts at {{ }} + match_parts = re.split(r'\{{(.*?)\}}', match_line.strip()) + pattern = "" + for j, part in enumerate(match_parts): + if j % 2 == 0: + pattern += re.escape(part) + else: + pattern += part + + # match or process tags + if re.fullmatch(pattern, input_line): + input_idx += 1 + match_idx += 1 + tags_in_effect = [] + elif Tag.OPT in tags_in_effect: + match_idx += 1 + tags_in_effect.remove(Tag.OPT) + elif Tag.IGNORE in tags_in_effect: + ignored_lines.append(input_line + os.linesep) + input_idx += 1 else: - print("Line " + str(i+1) + " does not match") - print("is: " + input_line) - print("expected: " + match_line.strip()) - print_content(input_lines, match_lines) + print_incorrect_match(match_idx + 1, input_line, match_line.strip()) + print_content(input_lines, match_lines, ignored_lines) sys.exit(1) - else: - if (input_idx == len(input_lines) - 1): - input_lines = [] - else: - input_idx += 1 + + +if __name__ == "__main__": + main() From 2e10197050f9e1963d2771aefd015738f81d50f8 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Tue, 7 Nov 2023 12:39:07 +0100 Subject: [PATCH 26/32] Refactor leak checking match tests --- test/layers/validation/CMakeLists.txt | 41 +++-- test/layers/validation/leaks.out.match | 134 ++++++++--------- test/layers/validation/leaks_mt.out.match | 175 +++++++++++----------- 3 files changed, 187 insertions(+), 163 deletions(-) diff --git a/test/layers/validation/CMakeLists.txt b/test/layers/validation/CMakeLists.txt index 85d639d196..944202e0d2 100644 --- a/test/layers/validation/CMakeLists.txt +++ b/test/layers/validation/CMakeLists.txt @@ -4,20 +4,20 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(UR_VALIDATION_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(VAL_TEST_PREFIX validation_test) -function(add_validation_test name) - set(TEST_TARGET_NAME validation_test-${name}) - add_ur_executable(${TEST_TARGET_NAME} +function(add_validation_test_executable name) + add_ur_executable(${VAL_TEST_PREFIX}-${name} ${ARGN}) - target_link_libraries(${TEST_TARGET_NAME} + target_link_libraries(${VAL_TEST_PREFIX}-${name} PRIVATE ${PROJECT_NAME}::loader ${PROJECT_NAME}::headers ${PROJECT_NAME}::testing GTest::gtest_main) - add_test(NAME ${name} - COMMAND ${TEST_TARGET_NAME} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +endfunction() + +function(set_validation_test_properties name) set_tests_properties(${name} PROPERTIES LABELS "validation") set_property(TEST ${name} PROPERTY ENVIRONMENT "UR_ENABLE_LAYERS=UR_LAYER_FULL_VALIDATION" @@ -25,11 +25,28 @@ function(add_validation_test name) "UR_LOG_VALIDATION=level:debug\;flush:debug\;output:stdout") endfunction() -function(add_validation_match_test name match_file) - add_validation_test(${name} ${ARGN}) - file(READ ${match_file} MATCH_STRING) - set_tests_properties(${name} PROPERTIES - PASS_REGULAR_EXPRESSION "${MATCH_STRING}") +function(add_validation_test name) + add_validation_test_executable(${name} ${ARGN}) + + add_test(NAME ${name} + COMMAND ${VAL_TEST_PREFIX}-${name} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_validation_test_properties(${name}) +endfunction() + +function(add_validation_match_test name) + add_validation_test_executable(${name} ${ARGN}) + + add_test(NAME ${name} + COMMAND ${CMAKE_COMMAND} + -D MODE=stdout + -D TEST_FILE=$ + -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}.out.match + -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_validation_test_properties(${name}) endfunction() add_validation_test(parameters parameters.cpp) diff --git a/test/layers/validation/leaks.out.match b/test/layers/validation/leaks.out.match index 90f8713765..2a36a22263 100644 --- a/test/layers/validation/leaks.out.match +++ b/test/layers/validation/leaks.out.match @@ -1,67 +1,67 @@ -.* -\[ RUN \] urTest.testUrAdapterGetLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] urTest.testUrAdapterRetainLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] urTest.testUrAdapterRetainNonexistent -\[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ -.* -\[ RUN \] valDeviceTest.testUrContextCreateLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] valDeviceTest.testUrContextRetainLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] valDeviceTest.testUrContextRetainNonexistent -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -.* -\[ RUN \] valDeviceTest.testUrContextCreateSuccess -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -.* -\[ RUN \] valDeviceTest.testUrContextRetainSuccess -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -.* -\[ RUN \] valDeviceTest.testUrContextReleaseLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] valDeviceTest.testUrContextReleaseNonexistent -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* +{{IGNORE}} +[ RUN ] urTest.testUrAdapterGetLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] urTest.testUrAdapterRetainLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[ERROR]: Retained 2 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] urTest.testUrAdapterRetainNonexistent +[ERROR]: Attempting to retain nonexistent handle {{[0-9xa-fA-F]+}} +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextCreateLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextRetainLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 2 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextRetainNonexistent +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[ERROR]: Attempting to retain nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextCreateSuccess +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextRetainSuccess +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextReleaseLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextReleaseNonexistent +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} diff --git a/test/layers/validation/leaks_mt.out.match b/test/layers/validation/leaks_mt.out.match index 86de1e1d76..f1bd32f8b5 100644 --- a/test/layers/validation/leaks_mt.out.match +++ b/test/layers/validation/leaks_mt.out.match @@ -1,84 +1,91 @@ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 3 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 4 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 5 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 6 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 7 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 8 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 9 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 9 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -2 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -3 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -4 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -5 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -6 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -7 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -7 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainLeakMt/0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 3 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 3 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainLeakMt/1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 3 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 4 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 5 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 6 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 7 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 8 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 9 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 9 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextReleaseLeakMt/0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextReleaseLeakMt/1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -2 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -3 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -4 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -5 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -6 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -7 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -7 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainReleaseLeakMt/0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainReleaseLeakMt/1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} From 2317492f56717edf84ffd432816c0b2e5cf06c80 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Wed, 13 Dec 2023 12:10:21 +0100 Subject: [PATCH 27/32] [L0][HIP][CUDA] Suppress aborted adapter match tests When tests are aborted or failed at the assertion, the match script receives "Aborted" or "Segmentation fault" as an input and compares it with the match file. Previous match script allowed those tests to pass despite not matching this input. --- test/conformance/enqueue/enqueue_adapter_cuda.match | 1 + test/conformance/enqueue/enqueue_adapter_hip.match | 2 +- test/conformance/event/event_adapter_level_zero.match | 2 +- test/conformance/kernel/kernel_adapter_hip.match | 2 +- test/conformance/program/program_adapter_hip.match | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/conformance/enqueue/enqueue_adapter_cuda.match b/test/conformance/enqueue/enqueue_adapter_cuda.match index f6f0d3e591..2392247314 100644 --- a/test/conformance/enqueue/enqueue_adapter_cuda.match +++ b/test/conformance/enqueue/enqueue_adapter_cuda.match @@ -56,3 +56,4 @@ {{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/NVIDIA_CUDA_BACKEND___{{.*}}___pitch__1__width__1__height__1 {{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT {{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/enqueue/enqueue_adapter_hip.match b/test/conformance/enqueue/enqueue_adapter_hip.match index 9d48681c1a..fe890b62b5 100644 --- a/test/conformance/enqueue/enqueue_adapter_hip.match +++ b/test/conformance/enqueue/enqueue_adapter_hip.match @@ -1,4 +1,4 @@ -{{OPT}}Segmentation Fault +{{OPT}}{{Segmentation fault|Aborted}} {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/event/event_adapter_level_zero.match b/test/conformance/event/event_adapter_level_zero.match index a316044ab1..c29f67cbc6 100644 --- a/test/conformance/event/event_adapter_level_zero.match +++ b/test/conformance/event/event_adapter_level_zero.match @@ -1,4 +1,4 @@ {{OPT}}urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE {{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED {{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT -{{OPT}} Segmentation fault +{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/kernel/kernel_adapter_hip.match b/test/conformance/kernel/kernel_adapter_hip.match index 96d579f088..97864c4e70 100644 --- a/test/conformance/kernel/kernel_adapter_hip.match +++ b/test/conformance/kernel/kernel_adapter_hip.match @@ -1,4 +1,4 @@ -{{OPT}}Segmentation Fault +{{OPT}}{{Segmentation fault|Aborted}} {{OPT}}urKernelGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS {{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME {{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_ARGS diff --git a/test/conformance/program/program_adapter_hip.match b/test/conformance/program/program_adapter_hip.match index 67f98ec2f7..1f95931e09 100644 --- a/test/conformance/program/program_adapter_hip.match +++ b/test/conformance/program/program_adapter_hip.match @@ -1,4 +1,4 @@ -{{OPT}}Segmentation Fault +{{OPT}}{{Segmentation fault|Aborted}} {{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urProgramGetBuildInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_BINARY_TYPE From 8c632085a6bf6b37823734c1c2de31221755d34b Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Mon, 11 Dec 2023 11:11:17 +0100 Subject: [PATCH 28/32] [OpenCL] Fix suffixes in adapter match tests --- .../enqueue/enqueue_adapter_opencl.match | 70 +++++++++---------- .../kernel/kernel_adapter_opencl.match | 10 +-- .../memory/memory_adapter_opencl.match | 2 +- test/conformance/usm/usm_adapter_opencl.match | 32 ++++----- 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match index 54a5ee3762..a034083c87 100644 --- a/test/conformance/enqueue/enqueue_adapter_opencl.match +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -1,35 +1,35 @@ -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/kernel/kernel_adapter_opencl.match b/test/conformance/kernel/kernel_adapter_opencl.match index 799225be19..9a71945c45 100644 --- a/test/conformance/kernel/kernel_adapter_opencl.match +++ b/test/conformance/kernel/kernel_adapter_opencl.match @@ -1,5 +1,5 @@ -urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.Success/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__OpenCL___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.Success/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/memory/memory_adapter_opencl.match b/test/conformance/memory/memory_adapter_opencl.match index c01e55d804..23dfbbae8c 100644 --- a/test/conformance/memory/memory_adapter_opencl.match +++ b/test/conformance/memory/memory_adapter_opencl.match @@ -1 +1 @@ -urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}}_ +urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match index 16211ba8e7..3e729141b9 100644 --- a/test/conformance/usm/usm_adapter_opencl.match +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -12,24 +12,24 @@ urUSMHostAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEn urUSMHostAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled -urUSMPoolCreateTest.Success/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.SuccessWithFlag/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidNullPointerPoolDesc/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidNullPointerPool/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidEnumerationFlags/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.Success/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.SuccessWithFlag/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidNullPointerPoolDesc/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidNullPointerPool/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidEnumerationFlags/Intel_R__OpenCL___{{.*}} urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_CONTEXT urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidEnumerationProperty/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeZero/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}}_ -urUSMPoolDestroyTest.Success/Intel_R__OpenCL___{{.*}}_ -urUSMPoolDestroyTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ -urUSMPoolRetainTest.Success/Intel_R__OpenCL___{{.*}}_ -urUSMPoolRetainTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidEnumerationProperty/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidSizeZero/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}} +urUSMPoolDestroyTest.Success/Intel_R__OpenCL___{{.*}} +urUSMPoolDestroyTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}} +urUSMPoolRetainTest.Success/Intel_R__OpenCL___{{.*}} +urUSMPoolRetainTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}} urUSMSharedAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__OpenCL___{{.*}}___UsePoolEnabled From e6e822ad3bfbc611e3f7921e305c07346cbe543b Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 11 Jan 2024 16:13:34 +0000 Subject: [PATCH 29/32] [HIP] Fix maybe uninitialized warnings --- source/adapters/hip/enqueue.cpp | 16 ++++++++-------- source/adapters/hip/kernel.cpp | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 68e3e665d2..a75d4ef9d1 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1071,8 +1071,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hipArray *Array = std::get(hImage->Mem).getArray(Device); - hipArray_Format Format; - size_t NumChannels; + hipArray_Format Format{}; + size_t NumChannels{}; UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels)); int ElementByteSize = imageElementByteSize(Format); @@ -1132,8 +1132,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hipArray *Array = std::get(hImage->Mem).getArray(hQueue->getDevice()); - hipArray_Format Format; - size_t NumChannels; + hipArray_Format Format{}; + size_t NumChannels{}; UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels)); int ElementByteSize = imageElementByteSize(Format); @@ -1195,14 +1195,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( hipArray *SrcArray = std::get(hImageSrc->Mem).getArray(hQueue->getDevice()); - hipArray_Format SrcFormat; - size_t SrcNumChannels; + hipArray_Format SrcFormat{}; + size_t SrcNumChannels{}; UR_CHECK_ERROR(getArrayDesc(SrcArray, SrcFormat, SrcNumChannels)); hipArray *DstArray = std::get(hImageDst->Mem).getArray(hQueue->getDevice()); - hipArray_Format DstFormat; - size_t DstNumChannels; + hipArray_Format DstFormat{}; + size_t DstNumChannels{}; UR_CHECK_ERROR(getArrayDesc(DstArray, DstFormat, DstNumChannels)); UR_ASSERT(SrcFormat == DstFormat, diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index e3eb37dc88..66b9fe4403 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -280,7 +280,7 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, hKernel->Args.addMemObjArg(argIndex, hArgValue, Properties->memoryAccess); if (hArgValue->isImage()) { auto array = std::get(hArgValue->Mem).getArray(Device); - hipArray_Format Format; + hipArray_Format Format{}; size_t NumChannels; UR_CHECK_ERROR(getArrayDesc(array, Format, NumChannels)); if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 && From d45f09ea303aaf9b4013298382445849b910319a Mon Sep 17 00:00:00 2001 From: Wenju He Date: Sat, 25 Nov 2023 12:30:36 +0800 Subject: [PATCH 30/32] [NFC] Add utility function ur_unreachable to simplify code --- source/adapters/cuda/usm.cpp | 6 +----- source/adapters/hip/usm.cpp | 7 +------ source/adapters/native_cpu/kernel.cpp | 7 ++----- source/common/ur_util.hpp | 8 ++++++++ tools/urtrace/collector.cpp | 2 +- 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 54c9b68204..e844116312 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -227,11 +227,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return ReturnValue(UR_USM_TYPE_HOST); } // should never get here -#ifdef _MSC_VER - __assume(0); -#else - __builtin_unreachable(); -#endif + ur_unreachable(); } case UR_USM_ALLOC_INFO_BASE_PTR: { #if CUDA_VERSION >= 10020 diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index abd8c2e97f..5d3d576d63 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -190,12 +190,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return ReturnValue(UR_USM_TYPE_HOST); } // should never get here -#ifdef _MSC_VER - __assume(0); -#else - __builtin_unreachable(); -#endif - return ReturnValue(UR_USM_TYPE_UNKNOWN); + ur_unreachable(); } case UR_USM_ALLOC_INFO_DEVICE: { // get device index associated with this pointer diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index 2a7e765e41..a6fc670898 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "ur_api.h" +#include "ur_util.hpp" #include "common.hpp" #include "kernel.hpp" @@ -172,11 +173,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, return ReturnValue(0); } case UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32: { -#ifdef _MSC_VER - __assume(0); -#else - __builtin_unreachable(); -#endif + ur_unreachable(); } } DIE_NO_IMPLEMENTATION; diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 5a34aa6584..aa67dc764b 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -298,4 +298,12 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) { template inline constexpr bool ur_always_false_t = false; +[[noreturn]] inline void ur_unreachable() { +#ifdef _MSC_VER + __assume(0); +#else + __builtin_unreachable(); +#endif +} + #endif /* UR_UTIL_H */ diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp index a78cb82d08..e7872dfbca 100644 --- a/tools/urtrace/collector.cpp +++ b/tools/urtrace/collector.cpp @@ -277,7 +277,7 @@ std::unique_ptr create_writer() { case OUTPUT_JSON: return std::make_unique(); default: - assert(0); /* unreachable */ + ur_unreachable(); } return nullptr; } From 5cde53733d093c4bf301c72e0892e71653c1c624 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 27 Nov 2023 19:28:59 +0800 Subject: [PATCH 31/32] ur_unreachable -> ur::unreachable --- source/adapters/cuda/usm.cpp | 2 +- source/adapters/hip/usm.cpp | 2 +- source/adapters/native_cpu/kernel.cpp | 2 +- source/common/ur_util.hpp | 4 +++- tools/urtrace/collector.cpp | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index e844116312..837b4de6c7 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -227,7 +227,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return ReturnValue(UR_USM_TYPE_HOST); } // should never get here - ur_unreachable(); + ur::unreachable(); } case UR_USM_ALLOC_INFO_BASE_PTR: { #if CUDA_VERSION >= 10020 diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 5d3d576d63..e660c1aee0 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -190,7 +190,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return ReturnValue(UR_USM_TYPE_HOST); } // should never get here - ur_unreachable(); + ur::unreachable(); } case UR_USM_ALLOC_INFO_DEVICE: { // get device index associated with this pointer diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index a6fc670898..5a7a286adc 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -173,7 +173,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, return ReturnValue(0); } case UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32: { - ur_unreachable(); + ur::unreachable(); } } DIE_NO_IMPLEMENTATION; diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index aa67dc764b..51688cbe32 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -298,12 +298,14 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) { template inline constexpr bool ur_always_false_t = false; -[[noreturn]] inline void ur_unreachable() { +namespace ur { +[[noreturn]] inline void unreachable() { #ifdef _MSC_VER __assume(0); #else __builtin_unreachable(); #endif } +} // namespace ur #endif /* UR_UTIL_H */ diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp index e7872dfbca..766e7c9dfe 100644 --- a/tools/urtrace/collector.cpp +++ b/tools/urtrace/collector.cpp @@ -277,7 +277,7 @@ std::unique_ptr create_writer() { case OUTPUT_JSON: return std::make_unique(); default: - ur_unreachable(); + ur::unreachable(); } return nullptr; } From 5accf50955ccd807f09024f15ea7fe7d0a008093 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 28 Nov 2023 08:13:08 +0800 Subject: [PATCH 32/32] include ur_util.hpp in cuda/usm.cpp hip/usm.cpp --- source/adapters/cuda/usm.cpp | 1 + source/adapters/hip/usm.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 837b4de6c7..8929fb7fa1 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -17,6 +17,7 @@ #include "event.hpp" #include "platform.hpp" #include "queue.hpp" +#include "ur_util.hpp" #include "usm.hpp" #include diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index e660c1aee0..9d084f7b4e 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -15,6 +15,7 @@ #include "context.hpp" #include "device.hpp" #include "platform.hpp" +#include "ur_util.hpp" #include "usm.hpp" /// USM: Implements USM Host allocations using HIP Pinned Memory