Skip to content

Commit

Permalink
[UR] Add default implementation for cooperative kernel functions
Browse files Browse the repository at this point in the history
Cooperative kernels can synchronize using device-scope barriers. These kernels
use `urKernelSuggestMaxCooperativeGroupCountExp` to ensure that all work groups
can run concurrently. When the maximum number of work groups is 1, these kernels
behave the same as regular kernels.

This PR adds a default implementation for
`urKernelSuggestMaxCooperativeGroupCountExp` that returns 1. Also, it adds a
default implementation for `urEnqueueCooperativeKernelLaunchExp` that calls
`urEnqueueKernelLaunch`.

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
  • Loading branch information
0x12CC committed Jan 11, 2024
1 parent e1414e1 commit 48a9ef1
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 0 deletions.
10 changes: 10 additions & 0 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
return Result;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize,
numEventsInWaitList, phEventWaitList, phEvent);
}

/// Set parameters for general 3D memory copy.
/// If the source and/or destination is on the device, SrcPtr and/or DstPtr
/// must be a pointer to a CUdeviceptr
Expand Down
7 changes: 7 additions & 0 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t *pGroupCountRet) {
(void)hKernel;
*pGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
const ur_kernel_arg_value_properties_t *pProperties,
Expand Down
10 changes: 10 additions & 0 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
return Result;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize,
numEventsInWaitList, phEventWaitList, phEvent);
}

/// Enqueues a wait on the given queue for all events.
/// See \ref enqueueEventWait
///
Expand Down
7 changes: 7 additions & 0 deletions source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ urKernelGetNativeHandle(ur_kernel_handle_t, ur_native_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t *pGroupCountRet) {
(void)hKernel;
*pGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
const ur_kernel_arg_value_properties_t *, const void *pArgValue) {
Expand Down
17 changes: 17 additions & 0 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize,
numEventsInWaitList, phEventWaitList, phEvent);
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to.
ur_program_handle_t Program, ///< [in] handle of the program containing the
Expand Down Expand Up @@ -736,6 +746,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t *pGroupCountRet) {
(void)hKernel;
*pGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
ur_native_handle_t NativeKernel, ///< [in] the native handle of the kernel.
ur_context_handle_t Context, ///< [in] handle of the context object
Expand Down
10 changes: 10 additions & 0 deletions source/adapters/opencl/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize,
numEventsInWaitList, phEventWaitList, phEvent);
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
Expand Down
7 changes: 7 additions & 0 deletions source/adapters/opencl/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t *pGroupCountRet) {
(void)hKernel;
*pGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t,
const ur_kernel_native_properties_t *pProperties,
Expand Down

0 comments on commit 48a9ef1

Please sign in to comment.