From b58375a2e49ea90a3f92fb70aa262935f054ee51 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 9 Oct 2023 14:11:40 +0100 Subject: [PATCH 01/16] [EXP][CMDBUF] Add adapters code for Prefetch and Advise commands Adds adapters code support for prefetch and advise memory hints for level_zero backend. Adds entry points for prefetch and advise memory hints for CUDA backend. --- source/adapters/cuda/command_buffer.cpp | 32 ++++++++ source/adapters/cuda/ur_interface_loader.cpp | 2 + source/adapters/level_zero/command_buffer.cpp | 74 +++++++++++++++++++ .../level_zero/ur_interface_loader.cpp | 2 + 4 files changed, 110 insertions(+) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 24a5d9497c..49ab0b813e 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -525,6 +525,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return Result; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void * /* Mem */, + size_t /*Size*/, ur_usm_migration_flags_t /*Flags*/, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void * /* Mem */, + size_t /*Size*/, ur_usm_advice_flags_t /*Advice*/, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index e3258f379d..049e532dfe 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -289,6 +289,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e8f3b061f9..db4fc78938 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -683,6 +683,80 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( SyncPointWaitList, SyncPoint); } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, + ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + std::ignore = Flags; + + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + if (NumSyncPointsInWaitList) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, + ZeEventList.data())); + } + + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_USM_PREFETCH; + + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + + // TODO: figure out how to translate "flags" + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, + (CommandBuffer->ZeCommandList, Mem, Size)); + + // TODO: Level Zero does not have a completion "event" with the prefetch API, + // so manually add command to signal our event. + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, + ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + auto ZeAdvice = ur_cast(Advice); + + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + if (NumSyncPointsInWaitList) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, + ZeEventList.data())); + } + + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_USM_ADVISE; + + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + + ZE2UR_CALL(zeCommandListAppendMemAdvise, + (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, + Mem, Size, ZeAdvice)); + + // TODO: Level Zero does not have a completion "event" with the advise API, + // so manually add command to signal our event. + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 09f4405744..5db5bfa6c9 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -336,6 +336,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From da9b81fc4ca9903c02f961524d5f67469cd58454 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 28 Sep 2023 14:26:15 +0100 Subject: [PATCH 02/16] Merge pull request #885 from Bensuo/mfrancepillois/cmd-buffer-prefetch-memadvice [EXP][CMDBUF] Add Prefetch and Advise commands to cmd buffer experimental feature --- include/ur.py | 14 ++++++++++ include/ur_api.h | 28 +++++++++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 3 ++ source/adapters/null/ur_nullddi.cpp | 6 ++++ source/loader/layers/tracing/ur_trcddi.cpp | 8 ++++++ source/loader/layers/validation/ur_valddi.cpp | 8 ++++++ 6 files changed, 67 insertions(+) diff --git a/include/ur.py b/include/ur.py index 09b7955e07..45c2cdb350 100644 --- a/include/ur.py +++ b/include/ur.py @@ -3691,6 +3691,20 @@ class ur_usm_exp_dditable_t(Structure): else: _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp +if __use_win_types: + _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp +if __use_win_types: + _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + ############################################################################### ## @brief Function-pointer for urCommandBufferEnqueueExp if __use_win_types: diff --git a/include/ur_api.h b/include/ur_api.h index 09f6d77a6b..1d8c6ca8f9 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -10667,6 +10667,34 @@ typedef struct ur_command_buffer_append_usm_advise_exp_params_t { ur_exp_command_buffer_sync_point_t **ppSyncPoint; } ur_command_buffer_append_usm_advise_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMPrefetchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_migration_flags_t *pflags; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_prefetch_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMAdviseExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_advise_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_advice_flags_t *padvice; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_advise_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..386bf48f37 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -227,6 +227,9 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.3 | Add function definitions for Prefetch and Advise | +| | commands | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index a4e91e3dc0..e8f7d48ae6 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -5457,6 +5457,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = driver::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMPrefetchExp = + driver::urCommandBufferAppendUSMPrefetchExp; + + pDdiTable->pfnAppendUSMAdviseExp = + driver::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; return result; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index d33a3aaf51..792f34ec1f 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6362,6 +6362,14 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_tracing_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ec0df692cf..13ec6eb47f 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8123,6 +8123,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_validation_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_validation_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_validation_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; From 7a833cce436cdf940e5b5e65cd465a32f382d593 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 9 Oct 2023 17:21:57 +0100 Subject: [PATCH 03/16] Adds HIP adapters entry points for prefetch and mem advise --- source/adapters/hip/command_buffer.cpp | 18 ++++++++++++++++++ source/adapters/hip/ur_interface_loader.cpp | 2 ++ 2 files changed, 20 insertions(+) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index d2cd156719..58ebc86928 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -122,6 +122,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t, const void *, size_t, + ur_usm_migration_flags_t, uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, + ur_exp_command_buffer_sync_point_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t, ur_queue_handle_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index a02f80957e..94764caaec 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -286,6 +286,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 402494d7d0d9836877782cae07e5a8ddfc3a095c Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 10 Oct 2023 10:13:11 +0100 Subject: [PATCH 04/16] Adds explicit parsing for memory advise + updates comments --- source/adapters/level_zero/command_buffer.cpp | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index db4fc78938..1a5894a72e 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -712,7 +712,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (CommandBuffer->ZeCommandList, Mem, Size)); - // TODO: Level Zero does not have a completion "event" with the prefetch API, + // Level Zero does not have a completion "event" with the prefetch API, // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); @@ -725,7 +725,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - auto ZeAdvice = ur_cast(Advice); + std::unordered_map + URToCUMemAdviseDeviceFlagsMap = { + {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, + ZE_MEMORY_ADVICE_SET_READ_MOSTLY}, + {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, + ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, + {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, + ZR_MEM_ADVISE_SET_PREFERRED_LOCATION}, + {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, + ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, + {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, + ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY}, + {UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY, + ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY}, + {UR_USM_ADVICE_FLAG_BIAS_CACHED, ZE_MEMORY_ADVICE_BIAS_CACHED}, + {UR_USM_ADVICE_FLAG_BIAS_UNCACHED, ZE_MEMORY_ADVICE_BIAS_UNCACHED}, + {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, + ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION}, + {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, + ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION}}; + + ze_memory_advice_t ZeAdvice = 0; + for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { + if (Advice & FlagPair.first) { + ZeAdvice |= FlagPair.second; + } + } std::vector ZeEventList; UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, @@ -749,7 +775,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, Mem, Size, ZeAdvice)); - // TODO: Level Zero does not have a completion "event" with the advise API, + // Level Zero does not have a completion "event" with the advise API, // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); From 379fc46cc7344b44cf85f2e1475e496f6f1f1355 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 10 Oct 2023 12:22:57 +0100 Subject: [PATCH 05/16] Bugfix --- source/adapters/level_zero/command_buffer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 1a5894a72e..b26a916644 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -732,7 +732,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - ZR_MEM_ADVISE_SET_PREFERRED_LOCATION}, + ZE_MEMORY_ADVISE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, @@ -742,16 +742,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_BIAS_CACHED, ZE_MEMORY_ADVICE_BIAS_CACHED}, {UR_USM_ADVICE_FLAG_BIAS_UNCACHED, ZE_MEMORY_ADVICE_BIAS_UNCACHED}, {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION}, + ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION}}; + ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}}; - ze_memory_advice_t ZeAdvice = 0; + uint32_t Value = 0; for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { if (Advice & FlagPair.first) { - ZeAdvice |= FlagPair.second; + Value |= static_cast(FlagPair.second); } } + ze_memory_advice_t ZeAdvice = static_cast(Value); std::vector ZeEventList; UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, From 0c9f62d455fb0d346819f816003eeb33d9f1f03b Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 10 Oct 2023 14:32:06 +0100 Subject: [PATCH 06/16] typos --- source/adapters/level_zero/command_buffer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index b26a916644..7f861a50d6 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -732,7 +732,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - ZE_MEMORY_ADVISE_SET_PREFERRED_LOCATION}, + ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, @@ -744,7 +744,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}}; + ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, + }; uint32_t Value = 0; for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { From f2df3fe2f5462887451d9e5ec8602e70eb17ee8d Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 11 Oct 2023 09:56:25 +0100 Subject: [PATCH 07/16] Changes map usage to if statements to handle advice flags --- source/adapters/level_zero/command_buffer.cpp | 51 +++++++++---------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 7f861a50d6..491fa99b61 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -725,34 +725,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::unordered_map - URToCUMemAdviseDeviceFlagsMap = { - {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, - ZE_MEMORY_ADVICE_SET_READ_MOSTLY}, - {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, - ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, - {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, - {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, - ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, - {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, - ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY}, - {UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY, - ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY}, - {UR_USM_ADVICE_FLAG_BIAS_CACHED, ZE_MEMORY_ADVICE_BIAS_CACHED}, - {UR_USM_ADVICE_FLAG_BIAS_UNCACHED, ZE_MEMORY_ADVICE_BIAS_UNCACHED}, - {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, - {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, - }; - + // A memory chunk can be advised with muliple memory advices + // We therefore prefer if statements to switch cases to combine all potential + // flags uint32_t Value = 0; - for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { - if (Advice & FlagPair.first) { - Value |= static_cast(FlagPair.second); - } - } + if (Advice & UR_USM_ADVICE_FLAG_SET_READ_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_READ_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); + if (Advice & UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_BIAS_CACHED) + Value |= static_cast(ZE_MEMORY_ADVICE_BIAS_CACHED); + if (Advice & UR_USM_ADVICE_FLAG_BIAS_UNCACHED) + Value |= static_cast(ZE_MEMORY_ADVICE_BIAS_UNCACHED); + if (Advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); + ze_memory_advice_t ZeAdvice = static_cast(Value); std::vector ZeEventList; From f135e6c232cb41aadf4b3859fe3212616a5f657d Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 11 Oct 2023 10:34:02 +0100 Subject: [PATCH 08/16] Bugfix hip stub function signature --- source/adapters/hip/command_buffer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 58ebc86928..c85b3e9216 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -134,6 +134,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, + uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); From 5cc3f880d296f433018e9f421d6c40384e28a8a8 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 12 Oct 2023 09:59:02 +0100 Subject: [PATCH 09/16] Improves comments --- source/adapters/level_zero/command_buffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 491fa99b61..bb081f9b2d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -708,7 +708,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( *SyncPoint = CommandBuffer->GetNextSyncPoint(); CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - // TODO: figure out how to translate "flags" + // Add the prefetch command to the command buffer. + // Note that L0 does not handle migration flags. ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (CommandBuffer->ZeCommandList, Mem, Size)); From 511637252d5b6e2c7a4fe40772ace944b70aaba8 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 4 Dec 2023 16:52:35 +0000 Subject: [PATCH 10/16] Updates generated files --- include/ur.py | 14 ---------- include/ur_api.h | 28 ------------------- source/adapters/null/ur_nullddi.cpp | 6 ---- source/loader/layers/tracing/ur_trcddi.cpp | 8 ------ source/loader/layers/validation/ur_valddi.cpp | 8 ------ 5 files changed, 64 deletions(-) diff --git a/include/ur.py b/include/ur.py index 45c2cdb350..09b7955e07 100644 --- a/include/ur.py +++ b/include/ur.py @@ -3691,20 +3691,6 @@ class ur_usm_exp_dditable_t(Structure): else: _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp -if __use_win_types: - _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp -if __use_win_types: - _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - ############################################################################### ## @brief Function-pointer for urCommandBufferEnqueueExp if __use_win_types: diff --git a/include/ur_api.h b/include/ur_api.h index 1d8c6ca8f9..09f6d77a6b 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -10667,34 +10667,6 @@ typedef struct ur_command_buffer_append_usm_advise_exp_params_t { ur_exp_command_buffer_sync_point_t **ppSyncPoint; } ur_command_buffer_append_usm_advise_exp_params_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendUSMPrefetchExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { - ur_exp_command_buffer_handle_t *phCommandBuffer; - const void **ppMemory; - size_t *psize; - ur_usm_migration_flags_t *pflags; - uint32_t *pnumSyncPointsInWaitList; - const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; - ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_usm_prefetch_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendUSMAdviseExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_usm_advise_exp_params_t { - ur_exp_command_buffer_handle_t *phCommandBuffer; - const void **ppMemory; - size_t *psize; - ur_usm_advice_flags_t *padvice; - uint32_t *pnumSyncPointsInWaitList; - const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; - ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_usm_advise_exp_params_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index e8f7d48ae6..a4e91e3dc0 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -5457,12 +5457,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = driver::urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnAppendUSMPrefetchExp = - driver::urCommandBufferAppendUSMPrefetchExp; - - pDdiTable->pfnAppendUSMAdviseExp = - driver::urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; return result; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 792f34ec1f..d33a3aaf51 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6362,14 +6362,6 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMPrefetchExp = - ur_tracing_layer::urCommandBufferAppendUSMPrefetchExp; - - dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; - pDdiTable->pfnAppendUSMAdviseExp = - ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 13ec6eb47f..ec0df692cf 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8123,14 +8123,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_validation_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMPrefetchExp = - ur_validation_layer::urCommandBufferAppendUSMPrefetchExp; - - dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; - pDdiTable->pfnAppendUSMAdviseExp = - ur_validation_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; From b0aed5954774597a32b5b4a38297ed32051fada3 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 5 Dec 2023 10:23:27 +0000 Subject: [PATCH 11/16] Adds CUDA support --- source/adapters/cuda/command_buffer.cpp | 59 ++++++++++++++++++------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 49ab0b813e..f2b896eada 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -531,14 +531,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; - - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + // Prefetch cmd is not supported by Cuda Graph. + // We implement it as an empty node to enforce dependencies. + ur_result_t Result = UR_RESULT_SUCCESS; + CUgraphNode GraphNode; + + std::vector DepsList; + UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, DepsList)); + + try { + // Add an empty node to preserve dependencies. + UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size())); + + // Get sync point and register the cuNode with it. + *pSyncPoint = + hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + } catch (ur_result_t Err) { + Result = Err; + } + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( @@ -547,14 +560,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; - - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + // Mem-Advise cmd is not supported by Cuda Graph. + // We implement it as an empty node to enforce dependencies. + ur_result_t Result = UR_RESULT_SUCCESS; + CUgraphNode GraphNode; + + std::vector DepsList; + UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, DepsList)); + + try { + // Add an empty node to preserve dependencies. + UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size())); + + // Get sync point and register the cuNode with it. + *pSyncPoint = + hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + } catch (ur_result_t Err) { + Result = Err; + } + + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( From 303e26c7bed95bc50560e228050661cf57ac9f93 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 5 Dec 2023 11:57:14 +0000 Subject: [PATCH 12/16] Cuda support bugfix --- source/adapters/cuda/command_buffer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index f2b896eada..695ff03ce2 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -538,7 +538,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( std::vector DepsList; UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, - pSyncPointWaitList, DepsList)); + pSyncPointWaitList, DepsList), + Result); try { // Add an empty node to preserve dependencies. @@ -567,7 +568,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( std::vector DepsList; UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, - pSyncPointWaitList, DepsList)); + pSyncPointWaitList, DepsList), + Result); try { // Add an empty node to preserve dependencies. From 1f256efc291ad9019535d812aa27fbb92e4f3803 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 15:28:46 +0000 Subject: [PATCH 13/16] removes duplicate entry --- scripts/core/EXP-COMMAND-BUFFER.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 386bf48f37..a6a32a66a1 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -227,9 +227,6 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ -| 1.3 | Add function definitions for Prefetch and Advise | -| | commands | -+-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- From ff8f41011f706efcea3541fe585ad02f1b9e5340 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 17:16:04 +0000 Subject: [PATCH 14/16] Adds OpenCL stubs --- source/adapters/opencl/command_buffer.cpp | 34 +++++++++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 2 ++ 2 files changed, 36 insertions(+) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 56b4d16b88..3bbe531fde 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -297,6 +297,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferFillExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)mem; + (void)size; + (void)flags; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + // Not implemented + return PI_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)mem; + (void)size; + (void)flags; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + // Not implemented + return PI_ERROR_INVALID_OPERATION; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index b42df19350..d8f34bc398 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -296,6 +296,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 1ed21073c8723a5d8f8333a1a3f0cf6c0db2654f Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 17:35:48 +0000 Subject: [PATCH 15/16] Fixes opencl stubs --- source/adapters/opencl/command_buffer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 3bbe531fde..25d3311b79 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -311,24 +311,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( (void)pSyncPoint; // Not implemented - return PI_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, + ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { (void)hCommandBuffer; (void)mem; (void)size; - (void)flags; + (void)advice; (void)numSyncPointsInWaitList; (void)pSyncPointWaitList; (void)pSyncPoint; // Not implemented - return PI_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( From 01cd56d2523c1f545955f86b98b637b25ef08d03 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 15 Dec 2023 11:12:53 +0000 Subject: [PATCH 16/16] Adds warning message to CUDA + removes calls it `die()` in Hip --- source/adapters/cuda/command_buffer.cpp | 10 ++++++++++ source/adapters/hip/command_buffer.cpp | 4 ---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 695ff03ce2..4a3f3da60e 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -549,6 +549,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( // Get sync point and register the cuNode with it. *pSyncPoint = hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + + setErrorMessage("Prefetch hint ignored and replaced with empty node as " + "prefetch is not supported by CUDA Graph backend", + UR_RESULT_SUCCESS); + Result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; } catch (ur_result_t Err) { Result = Err; } @@ -579,6 +584,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( // Get sync point and register the cuNode with it. *pSyncPoint = hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + + setErrorMessage("Memory advice ignored and replaced with empty node as " + "memory advice is not supported by CUDA Graph backend", + UR_RESULT_SUCCESS); + Result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; } catch (ur_result_t Err) { Result = Err; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index c85b3e9216..c7609b6110 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -127,8 +127,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -136,8 +134,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; }