From 74f27e2a8f7beb698c90b2058756dfae7b50f679 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mestre?= <fabio.mestre@codeplay.com>
Date: Mon, 23 Oct 2023 17:39:04 +0100
Subject: [PATCH 1/3] [CUDA] Change hint functions to return UR_SUCCESS

- The UR spec was recently changed to make hint entryponts
always return UR_RESULT_SUCCESS. This commit changes the CUDA
adapter to be conformant with this change.
- This commit also changes the type of PointerRangeSize which
 was causing a stack corruption.
---
 source/adapters/cuda/enqueue.cpp | 67 ++++++++++++++------------------
 1 file changed, 30 insertions(+), 37 deletions(-)

diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp
index 5761f24e0a..6a7b777180 100644
--- a/source/adapters/cuda/enqueue.cpp
+++ b/source/adapters/cuda/enqueue.cpp
@@ -1355,36 +1355,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
     ur_queue_handle_t hQueue, const void *pMem, size_t size,
     ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  unsigned int PointerRangeSize = 0;
+  std::ignore = flags;
+
+  size_t PointerRangeSize = 0;
   UR_CHECK_ERROR(cuPointerGetAttribute(
       &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
   UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
   ur_device_handle_t Device = hQueue->getContext()->getDevice();
 
+  bool Supported = true;
   // Certain cuda devices and Windows do not have support for some Unified
   // Memory features. cuMemPrefetchAsync requires concurrent memory access
-  // for managed memory. Therfore, ignore prefetch hint if concurrent managed
+  // for managed memory. Therefore, ignore prefetch hint if concurrent managed
   // memory access is not available.
   if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-    setErrorMessage("Prefetch hint ignored as device does not support "
-                    "concurrent managed access",
-                    UR_RESULT_SUCCESS);
-    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
+    Supported = false;
   }
 
   unsigned int IsManaged;
   UR_CHECK_ERROR(cuPointerGetAttribute(
       &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
   if (!IsManaged) {
-    setErrorMessage("Prefetch hint ignored as prefetch only works with USM",
-                    UR_RESULT_SUCCESS);
-    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
+    Supported = false;
   }
 
-  // flags is currently unused so fail if set
-  if (flags != 0)
-    return UR_RESULT_ERROR_INVALID_VALUE;
-
   ur_result_t Result = UR_RESULT_SUCCESS;
   std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
 
@@ -1399,8 +1393,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
               UR_COMMAND_MEM_BUFFER_COPY, hQueue, CuStream));
       UR_CHECK_ERROR(EventPtr->start());
     }
-    UR_CHECK_ERROR(
-        cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
+    if (Supported) {
+      UR_CHECK_ERROR(
+          cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
+    }
     if (phEvent) {
       UR_CHECK_ERROR(EventPtr->record());
       *phEvent = EventPtr.release();
@@ -1415,11 +1411,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
 UR_APIEXPORT ur_result_t UR_APICALL
 urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
                    ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) {
-  unsigned int PointerRangeSize = 0;
+  size_t PointerRangeSize = 0;
   UR_CHECK_ERROR(cuPointerGetAttribute(
       &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
   UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
 
+  bool Supported = true;
   // Certain cuda devices and Windows do not have support for some Unified
   // Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and
   // to cuMemAdvise on a GPU device requires the GPU device to report a non-zero
@@ -1432,10 +1429,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
       (advice & UR_USM_ADVICE_FLAG_DEFAULT)) {
     ur_device_handle_t Device = hQueue->getContext()->getDevice();
     if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-      setErrorMessage("Mem advise ignored as device does not support "
-                      "concurrent managed access",
-                      UR_RESULT_SUCCESS);
-      return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
+      Supported = false;
     }
 
     // TODO: If ptr points to valid system-allocated pageable memory we should
@@ -1447,10 +1441,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
   UR_CHECK_ERROR(cuPointerGetAttribute(
       &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
   if (!IsManaged) {
-    setErrorMessage(
-        "Memory advice ignored as memory advices only works with USM",
-        UR_RESULT_SUCCESS);
-    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
+    Supported = false;
   }
 
   ur_result_t Result = UR_RESULT_SUCCESS;
@@ -1466,19 +1457,21 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
       UR_CHECK_ERROR(EventPtr->start());
     }
 
-    if (advice & UR_USM_ADVICE_FLAG_DEFAULT) {
-      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                 CU_MEM_ADVISE_UNSET_READ_MOSTLY,
-                                 hQueue->getContext()->getDevice()->get()));
-      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                 CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION,
-                                 hQueue->getContext()->getDevice()->get()));
-      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                 CU_MEM_ADVISE_UNSET_ACCESSED_BY,
-                                 hQueue->getContext()->getDevice()->get()));
-    } else {
-      Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice,
-                              hQueue->getContext()->getDevice()->get());
+    if (Supported) {
+      if (advice & UR_USM_ADVICE_FLAG_DEFAULT) {
+        UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
+                                   CU_MEM_ADVISE_UNSET_READ_MOSTLY,
+                                   hQueue->getContext()->getDevice()->get()));
+        UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
+                                   CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION,
+                                   hQueue->getContext()->getDevice()->get()));
+        UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
+                                   CU_MEM_ADVISE_UNSET_ACCESSED_BY,
+                                   hQueue->getContext()->getDevice()->get()));
+      } else {
+        Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice,
+                                hQueue->getContext()->getDevice()->get());
+      }
     }
 
     if (phEvent) {

From e1902fcd35f581e5f3f304601e7c0b47d860ccef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mestre?= <fabio.mestre@codeplay.com>
Date: Thu, 9 Nov 2023 14:27:27 +0000
Subject: [PATCH 2/3] Address review comments

---
 source/adapters/cuda/enqueue.cpp | 62 ++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp
index 6a7b777180..cd09afb80f 100644
--- a/source/adapters/cuda/enqueue.cpp
+++ b/source/adapters/cuda/enqueue.cpp
@@ -76,6 +76,7 @@ void getUSMHostOrDevicePtr(PtrT USMPtr, CUmemorytype *OutMemType,
 ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size,
                            ur_usm_advice_flags_t URAdviceFlags,
                            CUdevice Device) {
+
   std::unordered_map<ur_usm_advice_flags_t, CUmem_advise>
       URToCUMemAdviseDeviceFlagsMap = {
           {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, CU_MEM_ADVISE_SET_READ_MOSTLY},
@@ -121,7 +122,10 @@ ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size,
 
   for (auto &UnmappedFlag : UnmappedMemAdviceFlags) {
     if (URAdviceFlags & UnmappedFlag) {
-      throw UR_RESULT_ERROR_INVALID_ENUMERATION;
+      setErrorMessage("Memory advice ignored because the CUDA backend does not "
+                      "support some of the specified flags",
+                      UR_RESULT_SUCCESS);
+      return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
     }
   }
 
@@ -1363,20 +1367,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
   UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
   ur_device_handle_t Device = hQueue->getContext()->getDevice();
 
-  bool Supported = true;
   // Certain cuda devices and Windows do not have support for some Unified
   // Memory features. cuMemPrefetchAsync requires concurrent memory access
   // for managed memory. Therefore, ignore prefetch hint if concurrent managed
   // memory access is not available.
   if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-    Supported = false;
+    setErrorMessage("Prefetch hint ignored as device does not support "
+                    "concurrent managed access",
+                    UR_RESULT_SUCCESS);
+    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
   }
 
   unsigned int IsManaged;
   UR_CHECK_ERROR(cuPointerGetAttribute(
       &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
   if (!IsManaged) {
-    Supported = false;
+    setErrorMessage("Prefetch hint ignored as prefetch only works with USM",
+                    UR_RESULT_SUCCESS);
+    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
   }
 
   ur_result_t Result = UR_RESULT_SUCCESS;
@@ -1393,10 +1401,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
               UR_COMMAND_MEM_BUFFER_COPY, hQueue, CuStream));
       UR_CHECK_ERROR(EventPtr->start());
     }
-    if (Supported) {
-      UR_CHECK_ERROR(
-          cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
-    }
+    UR_CHECK_ERROR(
+        cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
+
     if (phEvent) {
       UR_CHECK_ERROR(EventPtr->record());
       *phEvent = EventPtr.release();
@@ -1416,7 +1423,6 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
       &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
   UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
 
-  bool Supported = true;
   // Certain cuda devices and Windows do not have support for some Unified
   // Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and
   // to cuMemAdvise on a GPU device requires the GPU device to report a non-zero
@@ -1429,7 +1435,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
       (advice & UR_USM_ADVICE_FLAG_DEFAULT)) {
     ur_device_handle_t Device = hQueue->getContext()->getDevice();
     if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-      Supported = false;
+      setErrorMessage("Mem advise ignored as device does not support "
+                      "concurrent managed access",
+                      UR_RESULT_SUCCESS);
+      return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
     }
 
     // TODO: If ptr points to valid system-allocated pageable memory we should
@@ -1441,7 +1450,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
   UR_CHECK_ERROR(cuPointerGetAttribute(
       &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
   if (!IsManaged) {
-    Supported = false;
+    setErrorMessage(
+        "Memory advice ignored as memory advices only works with USM",
+        UR_RESULT_SUCCESS);
+    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
   }
 
   ur_result_t Result = UR_RESULT_SUCCESS;
@@ -1457,21 +1469,19 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
       UR_CHECK_ERROR(EventPtr->start());
     }
 
-    if (Supported) {
-      if (advice & UR_USM_ADVICE_FLAG_DEFAULT) {
-        UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                   CU_MEM_ADVISE_UNSET_READ_MOSTLY,
-                                   hQueue->getContext()->getDevice()->get()));
-        UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                   CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION,
-                                   hQueue->getContext()->getDevice()->get()));
-        UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                   CU_MEM_ADVISE_UNSET_ACCESSED_BY,
-                                   hQueue->getContext()->getDevice()->get()));
-      } else {
-        Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice,
-                                hQueue->getContext()->getDevice()->get());
-      }
+    if (advice & UR_USM_ADVICE_FLAG_DEFAULT) {
+      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
+                                 CU_MEM_ADVISE_UNSET_READ_MOSTLY,
+                                 hQueue->getContext()->getDevice()->get()));
+      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
+                                 CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION,
+                                 hQueue->getContext()->getDevice()->get()));
+      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
+                                 CU_MEM_ADVISE_UNSET_ACCESSED_BY,
+                                 hQueue->getContext()->getDevice()->get()));
+    } else {
+      Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice,
+                              hQueue->getContext()->getDevice()->get());
     }
 
     if (phEvent) {

From 5b2c2dd652836e849f34ad9cc71e29a740e84bb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mestre?= <fabio.mestre@codeplay.com>
Date: Thu, 9 Nov 2023 14:30:26 +0000
Subject: [PATCH 3/3] Remove extra spaces

---
 source/adapters/cuda/enqueue.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp
index cd09afb80f..c752c3fd14 100644
--- a/source/adapters/cuda/enqueue.cpp
+++ b/source/adapters/cuda/enqueue.cpp
@@ -76,7 +76,6 @@ void getUSMHostOrDevicePtr(PtrT USMPtr, CUmemorytype *OutMemType,
 ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size,
                            ur_usm_advice_flags_t URAdviceFlags,
                            CUdevice Device) {
-
   std::unordered_map<ur_usm_advice_flags_t, CUmem_advise>
       URToCUMemAdviseDeviceFlagsMap = {
           {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, CU_MEM_ADVISE_SET_READ_MOSTLY},
@@ -1403,7 +1402,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
     }
     UR_CHECK_ERROR(
         cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
-
     if (phEvent) {
       UR_CHECK_ERROR(EventPtr->record());
       *phEvent = EventPtr.release();