From 74f27e2a8f7beb698c90b2058756dfae7b50f679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 23 Oct 2023 17:39:04 +0100 Subject: [PATCH 1/3] [CUDA] Change hint functions to return UR_SUCCESS - The UR spec was recently changed to make hint entryponts always return UR_RESULT_SUCCESS. This commit changes the CUDA adapter to be conformant with this change. - This commit also changes the type of PointerRangeSize which was causing a stack corruption. --- source/adapters/cuda/enqueue.cpp | 67 ++++++++++++++------------------ 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 5761f24e0a..6a7b777180 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -1355,36 +1355,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - unsigned int PointerRangeSize = 0; + std::ignore = flags; + + size_t PointerRangeSize = 0; UR_CHECK_ERROR(cuPointerGetAttribute( &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); ur_device_handle_t Device = hQueue->getContext()->getDevice(); + bool Supported = true; // Certain cuda devices and Windows do not have support for some Unified // Memory features. cuMemPrefetchAsync requires concurrent memory access - // for managed memory. Therfore, ignore prefetch hint if concurrent managed + // for managed memory. Therefore, ignore prefetch hint if concurrent managed // memory access is not available. if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) { - setErrorMessage("Prefetch hint ignored as device does not support " - "concurrent managed access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + Supported = false; } unsigned int IsManaged; UR_CHECK_ERROR(cuPointerGetAttribute( &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem)); if (!IsManaged) { - setErrorMessage("Prefetch hint ignored as prefetch only works with USM", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + Supported = false; } - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr EventPtr{nullptr}; @@ -1399,8 +1393,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( UR_COMMAND_MEM_BUFFER_COPY, hQueue, CuStream)); UR_CHECK_ERROR(EventPtr->start()); } - UR_CHECK_ERROR( - cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream)); + if (Supported) { + UR_CHECK_ERROR( + cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream)); + } if (phEvent) { UR_CHECK_ERROR(EventPtr->record()); *phEvent = EventPtr.release(); @@ -1415,11 +1411,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - unsigned int PointerRangeSize = 0; + size_t PointerRangeSize = 0; UR_CHECK_ERROR(cuPointerGetAttribute( &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); + bool Supported = true; // Certain cuda devices and Windows do not have support for some Unified // Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and // to cuMemAdvise on a GPU device requires the GPU device to report a non-zero @@ -1432,10 +1429,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, (advice & UR_USM_ADVICE_FLAG_DEFAULT)) { ur_device_handle_t Device = hQueue->getContext()->getDevice(); if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) { - setErrorMessage("Mem advise ignored as device does not support " - "concurrent managed access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + Supported = false; } // TODO: If ptr points to valid system-allocated pageable memory we should @@ -1447,10 +1441,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, UR_CHECK_ERROR(cuPointerGetAttribute( &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem)); if (!IsManaged) { - setErrorMessage( - "Memory advice ignored as memory advices only works with USM", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + Supported = false; } ur_result_t Result = UR_RESULT_SUCCESS; @@ -1466,19 +1457,21 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, UR_CHECK_ERROR(EventPtr->start()); } - if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { - UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, - CU_MEM_ADVISE_UNSET_READ_MOSTLY, - hQueue->getContext()->getDevice()->get())); - UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, - CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, - hQueue->getContext()->getDevice()->get())); - UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, - CU_MEM_ADVISE_UNSET_ACCESSED_BY, - hQueue->getContext()->getDevice()->get())); - } else { - Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice, - hQueue->getContext()->getDevice()->get()); + if (Supported) { + if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { + UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, + CU_MEM_ADVISE_UNSET_READ_MOSTLY, + hQueue->getContext()->getDevice()->get())); + UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, + CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, + hQueue->getContext()->getDevice()->get())); + UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, + CU_MEM_ADVISE_UNSET_ACCESSED_BY, + hQueue->getContext()->getDevice()->get())); + } else { + Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice, + hQueue->getContext()->getDevice()->get()); + } } if (phEvent) { From e1902fcd35f581e5f3f304601e7c0b47d860ccef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 9 Nov 2023 14:27:27 +0000 Subject: [PATCH 2/3] Address review comments --- source/adapters/cuda/enqueue.cpp | 62 ++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 6a7b777180..cd09afb80f 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -76,6 +76,7 @@ void getUSMHostOrDevicePtr(PtrT USMPtr, CUmemorytype *OutMemType, ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size, ur_usm_advice_flags_t URAdviceFlags, CUdevice Device) { + std::unordered_map URToCUMemAdviseDeviceFlagsMap = { {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, CU_MEM_ADVISE_SET_READ_MOSTLY}, @@ -121,7 +122,10 @@ ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size, for (auto &UnmappedFlag : UnmappedMemAdviceFlags) { if (URAdviceFlags & UnmappedFlag) { - throw UR_RESULT_ERROR_INVALID_ENUMERATION; + setErrorMessage("Memory advice ignored because the CUDA backend does not " + "support some of the specified flags", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } } @@ -1363,20 +1367,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); ur_device_handle_t Device = hQueue->getContext()->getDevice(); - bool Supported = true; // Certain cuda devices and Windows do not have support for some Unified // Memory features. cuMemPrefetchAsync requires concurrent memory access // for managed memory. Therefore, ignore prefetch hint if concurrent managed // memory access is not available. if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) { - Supported = false; + setErrorMessage("Prefetch hint ignored as device does not support " + "concurrent managed access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } unsigned int IsManaged; UR_CHECK_ERROR(cuPointerGetAttribute( &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem)); if (!IsManaged) { - Supported = false; + setErrorMessage("Prefetch hint ignored as prefetch only works with USM", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } ur_result_t Result = UR_RESULT_SUCCESS; @@ -1393,10 +1401,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( UR_COMMAND_MEM_BUFFER_COPY, hQueue, CuStream)); UR_CHECK_ERROR(EventPtr->start()); } - if (Supported) { - UR_CHECK_ERROR( - cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream)); - } + UR_CHECK_ERROR( + cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream)); + if (phEvent) { UR_CHECK_ERROR(EventPtr->record()); *phEvent = EventPtr.release(); @@ -1416,7 +1423,6 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); - bool Supported = true; // Certain cuda devices and Windows do not have support for some Unified // Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and // to cuMemAdvise on a GPU device requires the GPU device to report a non-zero @@ -1429,7 +1435,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, (advice & UR_USM_ADVICE_FLAG_DEFAULT)) { ur_device_handle_t Device = hQueue->getContext()->getDevice(); if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) { - Supported = false; + setErrorMessage("Mem advise ignored as device does not support " + "concurrent managed access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } // TODO: If ptr points to valid system-allocated pageable memory we should @@ -1441,7 +1450,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, UR_CHECK_ERROR(cuPointerGetAttribute( &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem)); if (!IsManaged) { - Supported = false; + setErrorMessage( + "Memory advice ignored as memory advices only works with USM", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } ur_result_t Result = UR_RESULT_SUCCESS; @@ -1457,21 +1469,19 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, UR_CHECK_ERROR(EventPtr->start()); } - if (Supported) { - if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { - UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, - CU_MEM_ADVISE_UNSET_READ_MOSTLY, - hQueue->getContext()->getDevice()->get())); - UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, - CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, - hQueue->getContext()->getDevice()->get())); - UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, - CU_MEM_ADVISE_UNSET_ACCESSED_BY, - hQueue->getContext()->getDevice()->get())); - } else { - Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice, - hQueue->getContext()->getDevice()->get()); - } + if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { + UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, + CU_MEM_ADVISE_UNSET_READ_MOSTLY, + hQueue->getContext()->getDevice()->get())); + UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, + CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, + hQueue->getContext()->getDevice()->get())); + UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size, + CU_MEM_ADVISE_UNSET_ACCESSED_BY, + hQueue->getContext()->getDevice()->get())); + } else { + Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice, + hQueue->getContext()->getDevice()->get()); } if (phEvent) { From 5b2c2dd652836e849f34ad9cc71e29a740e84bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 9 Nov 2023 14:30:26 +0000 Subject: [PATCH 3/3] Remove extra spaces --- source/adapters/cuda/enqueue.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index cd09afb80f..c752c3fd14 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -76,7 +76,6 @@ void getUSMHostOrDevicePtr(PtrT USMPtr, CUmemorytype *OutMemType, ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size, ur_usm_advice_flags_t URAdviceFlags, CUdevice Device) { - std::unordered_map URToCUMemAdviseDeviceFlagsMap = { {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, CU_MEM_ADVISE_SET_READ_MOSTLY}, @@ -1403,7 +1402,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( } UR_CHECK_ERROR( cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream)); - if (phEvent) { UR_CHECK_ERROR(EventPtr->record()); *phEvent = EventPtr.release();