Skip to content

Commit

Permalink
Merge pull request #931 from konradkusiak97/konradkusiak/LocalMemSize…
Browse files Browse the repository at this point in the history
…Query

[UR] [CUDA] Changed the output of querying localMemSize
  • Loading branch information
kbenzie authored Nov 6, 2023
2 parents a62423d + d5a4691 commit 0e24ab8
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 16 deletions.
8 changes: 2 additions & 6 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(
static_cast<uint64_t>(hDevice->getMaxChosenLocalMem()));
} else {
int LocalMemSize = 0;
UR_CHECK_ERROR(cuDeviceGetAttribute(
&LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
hDevice->get()));
detail::ur::assertion(LocalMemSize >= 0);
return ReturnValue(static_cast<uint64_t>(LocalMemSize));
return ReturnValue(
static_cast<uint64_t>(hDevice->getMaxCapacityLocalMem()));
}
}
case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ struct ur_device_handle_t_ {
UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
cuDevice));
UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxCapacityLocalMem,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice));

// Set local mem max size if env var is present
static const char *LocalMemSizePtrUR =
Expand All @@ -56,9 +59,6 @@ struct ur_device_handle_t_ {
: (LocalMemSizePtrPI ? LocalMemSizePtrPI : nullptr);

if (LocalMemSizePtr) {
cuDeviceGetAttribute(
&MaxCapacityLocalMem,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice);
MaxChosenLocalMem = std::atoi(LocalMemSizePtr);
MaxLocalMemSizeChosen = true;
}
Expand Down
19 changes: 12 additions & 7 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,15 @@ setKernelParams(const ur_context_handle_t Context,
CudaImplicitOffset);
}

if (Context->getDevice()->maxLocalMemSizeChosen()) {
auto Device = Context->getDevice();
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
setErrorMessage("Excessive allocation of local memory on the device",
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}

if (Device->maxLocalMemSizeChosen()) {
// Set up local memory requirements for kernel.
auto Device = Context->getDevice();
if (Device->getMaxChosenLocalMem() < 0) {
bool EnvVarHasURPrefix =
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
Expand All @@ -297,11 +303,6 @@ setKernelParams(const ur_context_handle_t Context,
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
setErrorMessage("Too much local memory allocated for device",
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}
if (LocalSize > static_cast<uint32_t>(Device->getMaxChosenLocalMem())) {
bool EnvVarHasURPrefix =
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
Expand All @@ -319,6 +320,10 @@ setKernelParams(const ur_context_handle_t Context,
UR_CHECK_ERROR(cuFuncSetAttribute(
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
Device->getMaxChosenLocalMem()));

} else {
UR_CHECK_ERROR(cuFuncSetAttribute(
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, LocalSize));
}

} catch (ur_result_t Err) {
Expand Down

0 comments on commit 0e24ab8

Please sign in to comment.