From c55dc2af42b5c2dd03b0c54f90ab001d1b6b8285 Mon Sep 17 00:00:00 2001 From: konrad Date: Fri, 6 Oct 2023 12:58:14 +0100 Subject: [PATCH 1/4] Resolved conflicts --- source/adapters/cuda/device.cpp | 8 ++------ source/adapters/cuda/device.hpp | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 76984ca744..a4877236ae 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -501,12 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue( static_cast(hDevice->getMaxChosenLocalMem())); } else { - int LocalMemSize = 0; - UR_CHECK_ERROR(cuDeviceGetAttribute( - &LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, - hDevice->get())); - detail::ur::assertion(LocalMemSize >= 0); - return ReturnValue(static_cast(LocalMemSize)); + return ReturnValue( + static_cast(hDevice->getMaxCapacityLocalMem())); } } case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: { diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp index 919f813e4e..5ad70672e9 100644 --- a/source/adapters/cuda/device.hpp +++ b/source/adapters/cuda/device.hpp @@ -45,6 +45,9 @@ struct ur_device_handle_t_ { UR_CHECK_ERROR(cuDeviceGetAttribute( &MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, cuDevice)); + UR_CHECK_ERROR(cuDeviceGetAttribute( + &MaxCapacityLocalMem, + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice)); // Set local mem max size if env var is present static const char *LocalMemSizePtrUR = @@ -56,9 +59,6 @@ struct ur_device_handle_t_ { : (LocalMemSizePtrPI ? LocalMemSizePtrPI : nullptr); if (LocalMemSizePtr) { - cuDeviceGetAttribute( - &MaxCapacityLocalMem, - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice); MaxChosenLocalMem = std::atoi(LocalMemSizePtr); MaxLocalMemSizeChosen = true; } From c86b8412f02a8787a9e1fc84c9a187864c6b9606 Mon Sep 17 00:00:00 2001 From: konrad Date: Fri, 3 Nov 2023 11:20:31 +0000 Subject: [PATCH 2/4] Moved conflicted changes to setKernelParams --- source/adapters/cuda/enqueue.cpp | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 6e6515908e..117261ed11 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -284,12 +284,18 @@ setKernelParams(const ur_context_handle_t Context, CudaImplicitOffset); } - if (Context->getDevice()->maxLocalMemSizeChosen()) { + auto Device = Context->getDevice(); + if (LocalSize > static_cast(Device->getMaxCapacityLocalMem())) { + setErrorMessage("Too much local memory allocated for device", + UR_RESULT_ERROR_ADAPTER_SPECIFIC); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + if (Device->maxLocalMemSizeChosen()) { // Set up local memory requirements for kernel. - auto Device = Context->getDevice(); if (Device->getMaxChosenLocalMem() < 0) { bool EnvVarHasURPrefix = - std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr; + (std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr); setErrorMessage(EnvVarHasURPrefix ? "Invalid value specified for " "UR_CUDA_MAX_LOCAL_MEM_SIZE" : "Invalid value specified for " @@ -297,14 +303,9 @@ setKernelParams(const ur_context_handle_t Context, UR_RESULT_ERROR_ADAPTER_SPECIFIC); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } - if (LocalSize > static_cast(Device->getMaxCapacityLocalMem())) { - setErrorMessage("Too much local memory allocated for device", - UR_RESULT_ERROR_ADAPTER_SPECIFIC); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } if (LocalSize > static_cast(Device->getMaxChosenLocalMem())) { bool EnvVarHasURPrefix = - std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr; + (std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr); setErrorMessage( EnvVarHasURPrefix ? "Local memory for kernel exceeds the amount requested using " @@ -319,6 +320,10 @@ setKernelParams(const ur_context_handle_t Context, UR_CHECK_ERROR(cuFuncSetAttribute( CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, Device->getMaxChosenLocalMem())); + + } else { + UR_CHECK_ERROR(cuFuncSetAttribute( + CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, LocalSize)); } } catch (ur_result_t Err) { From 17cb185da3e344d515f79463b4e261d2cc09384c Mon Sep 17 00:00:00 2001 From: konrad Date: Fri, 3 Nov 2023 11:54:13 +0000 Subject: [PATCH 3/4] Removed unnecessary bracket change --- source/adapters/cuda/enqueue.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 117261ed11..40361339bd 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -295,7 +295,7 @@ setKernelParams(const ur_context_handle_t Context, // Set up local memory requirements for kernel. if (Device->getMaxChosenLocalMem() < 0) { bool EnvVarHasURPrefix = - (std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr); + std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr; setErrorMessage(EnvVarHasURPrefix ? "Invalid value specified for " "UR_CUDA_MAX_LOCAL_MEM_SIZE" : "Invalid value specified for " @@ -305,7 +305,7 @@ setKernelParams(const ur_context_handle_t Context, } if (LocalSize > static_cast(Device->getMaxChosenLocalMem())) { bool EnvVarHasURPrefix = - (std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr); + std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr; setErrorMessage( EnvVarHasURPrefix ? "Local memory for kernel exceeds the amount requested using " From d5a46915477f6d9eb9f0ac292fe6f94195850316 Mon Sep 17 00:00:00 2001 From: konrad Date: Fri, 3 Nov 2023 12:19:44 +0000 Subject: [PATCH 4/4] Changed the error message --- source/adapters/cuda/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 40361339bd..5761f24e0a 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -286,7 +286,7 @@ setKernelParams(const ur_context_handle_t Context, auto Device = Context->getDevice(); if (LocalSize > static_cast(Device->getMaxCapacityLocalMem())) { - setErrorMessage("Too much local memory allocated for device", + setErrorMessage("Excessive allocation of local memory on the device", UR_RESULT_ERROR_ADAPTER_SPECIFIC); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; }