Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into cmdbuf-support-hip
Browse files Browse the repository at this point in the history
  • Loading branch information
EwanC committed Feb 23, 2024
2 parents c9d1431 + 588615e commit a0bebf1
Show file tree
Hide file tree
Showing 40 changed files with 1,436 additions and 72 deletions.
63 changes: 61 additions & 2 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP = 217, ///< Enumerator for ::urCommandBufferReleaseCommandExp
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 218, ///< Enumerator for ::urCommandBufferGetInfoExp
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 219, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
UR_FUNCTION_DEVICE_GET_SELECTED = 220, ///< Enumerator for ::urDeviceGetSelected
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -1387,6 +1388,46 @@ urDeviceGet(
///< pNumDevices will be updated with the total number of devices available.
);

///////////////////////////////////////////////////////////////////////////////
/// @brief Retrieves devices within a platform selected by ONEAPI_DEVICE_SELECTOR
///
/// @details
/// - Multiple calls to this function will return identical device handles,
/// in the same order.
/// - The number and order of handles returned from this function will be
/// affected by environment variables that filter or select which devices
/// are exposed through this API.
/// - A reference is taken for each returned device and must be released
/// with a subsequent call to ::urDeviceRelease.
/// - The application may call this function from simultaneous threads, the
/// implementation must be thread-safe.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hPlatform`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_TYPE_VPU < DeviceType`
/// - ::UR_RESULT_ERROR_INVALID_VALUE
UR_APIEXPORT ur_result_t UR_APICALL
urDeviceGetSelected(
ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance
ur_device_type_t DeviceType, ///< [in] the type of the devices.
uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices.
///< If phDevices in not NULL then NumEntries should be greater than zero,
///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE,
///< will be returned.
ur_device_handle_t *phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices.
///< If NumEntries is less than the number of devices available, then only
///< that number of devices will be retrieved.
uint32_t *pNumDevices ///< [out][optional] pointer to the number of devices.
///< pNumDevices will be updated with the total number of selected devices
///< available for the given platform.
);

///////////////////////////////////////////////////////////////////////////////
/// @brief Supported device info
typedef enum ur_device_info_t {
Expand Down Expand Up @@ -8692,8 +8733,12 @@ urEnqueueCooperativeKernelLaunchExp(
/// - ::UR_RESULT_ERROR_INVALID_KERNEL
UR_APIEXPORT ur_result_t UR_APICALL
urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
size_t localWorkSize, ///< [in] number of local work-items that will form a work-group when the
///< kernel is launched
size_t dynamicSharedMemorySize, ///< [in] size of dynamic shared memory, for each work-group, in bytes,
///< that will be used when the kernel is launched
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
);

#if !defined(__GNUC__)
Expand Down Expand Up @@ -9641,6 +9686,8 @@ typedef struct ur_kernel_set_specialization_constants_params_t {
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t {
ur_kernel_handle_t *phKernel;
size_t *plocalWorkSize;
size_t *pdynamicSharedMemorySize;
uint32_t **ppGroupCountRet;
} ur_kernel_suggest_max_cooperative_group_count_exp_params_t;

Expand Down Expand Up @@ -11148,6 +11195,18 @@ typedef struct ur_device_get_params_t {
uint32_t **ppNumDevices;
} ur_device_get_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urDeviceGetSelected
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_device_get_selected_params_t {
ur_platform_handle_t *phPlatform;
ur_device_type_t *pDeviceType;
uint32_t *pNumEntries;
ur_device_handle_t **pphDevices;
uint32_t **ppNumDevices;
} ur_device_get_selected_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urDeviceGetInfo
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
2 changes: 2 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetKernelProcAddrTable_t)(
/// @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp
typedef ur_result_t(UR_APICALL *ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t)(
ur_kernel_handle_t,
size_t,
size_t,
uint32_t *);

///////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -2450,6 +2450,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintVirtualMemGetInfoParams(const struct
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceGetParams(const struct ur_device_get_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_device_get_selected_params_t struct
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceGetSelectedParams(const struct ur_device_get_selected_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_device_get_info_params_t struct
/// @returns
Expand Down
58 changes: 58 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
break;
case UR_FUNCTION_DEVICE_GET_SELECTED:
os << "UR_FUNCTION_DEVICE_GET_SELECTED";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -11399,6 +11402,16 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
ur::details::printPtr(os,
*(params->phKernel));

os << ", ";
os << ".localWorkSize = ";

os << *(params->plocalWorkSize);

os << ", ";
os << ".dynamicSharedMemorySize = ";

os << *(params->pdynamicSharedMemorySize);

os << ", ";
os << ".pGroupCountRet = ";

Expand Down Expand Up @@ -16282,6 +16295,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_device_get_selected_params_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_device_get_selected_params_t *params) {

os << ".hPlatform = ";

ur::details::printPtr(os,
*(params->phPlatform));

os << ", ";
os << ".DeviceType = ";

os << *(params->pDeviceType);

os << ", ";
os << ".NumEntries = ";

os << *(params->pNumEntries);

os << ", ";
os << ".phDevices = {";
for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pNumEntries; ++i) {
if (i != 0) {
os << ", ";
}

ur::details::printPtr(os,
(*(params->pphDevices))[i]);
}
os << "}";

os << ", ";
os << ".pNumDevices = ";

ur::details::printPtr(os,
*(params->ppNumDevices));

return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_device_get_info_params_t type
/// @returns
Expand Down Expand Up @@ -17080,6 +17135,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
case UR_FUNCTION_DEVICE_GET: {
os << (const struct ur_device_get_params_t *)params;
} break;
case UR_FUNCTION_DEVICE_GET_SELECTED: {
os << (const struct ur_device_get_selected_params_t *)params;
} break;
case UR_FUNCTION_DEVICE_GET_INFO: {
os << (const struct ur_device_get_info_params_t *)params;
} break;
Expand Down
39 changes: 39 additions & 0 deletions scripts/core/device.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,45 @@ returns:
- "`NumEntries > 0 && phDevices == NULL`"
- $X_RESULT_ERROR_INVALID_VALUE
--- #--------------------------------------------------------------------------
type: function
desc: "Retrieves devices within a platform selected by ONEAPI_DEVICE_SELECTOR"
class: $xDevice
loader_only: True
name: GetSelected
decl: static
ordinal: "0"
details:
- "Multiple calls to this function will return identical device handles, in the same order."
- "The number and order of handles returned from this function will be affected by environment variables that filter or select which devices are exposed through this API."
- "A reference is taken for each returned device and must be released with a subsequent call to $xDeviceRelease."
- "The application may call this function from simultaneous threads, the implementation must be thread-safe."
params:
- type: $x_platform_handle_t
name: hPlatform
desc: "[in] handle of the platform instance"
- type: "$x_device_type_t"
name: DeviceType
desc: |
[in] the type of the devices.
- type: "uint32_t"
name: NumEntries
desc: |
[in] the number of devices to be added to phDevices.
If phDevices in not NULL then NumEntries should be greater than zero, otherwise $X_RESULT_ERROR_INVALID_VALUE,
will be returned.
- type: "$x_device_handle_t*"
name: phDevices
desc: |
[out][optional][range(0, NumEntries)] array of handle of devices.
If NumEntries is less than the number of devices available, then only that number of devices will be retrieved.
- type: "uint32_t*"
name: pNumDevices
desc: |
[out][optional] pointer to the number of devices.
pNumDevices will be updated with the total number of selected devices available for the given platform.
returns:
- $X_RESULT_ERROR_INVALID_VALUE
--- #--------------------------------------------------------------------------
type: enum
desc: "Supported device info"
class: $xDevice
Expand Down
6 changes: 6 additions & 0 deletions scripts/core/exp-cooperative-kernels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ params:
- type: $x_kernel_handle_t
name: hKernel
desc: "[in] handle of the kernel object"
- type: size_t
name: localWorkSize
desc: "[in] number of local work-items that will form a work-group when the kernel is launched"
- type: size_t
name: dynamicSharedMemorySize
desc: "[in] size of dynamic shared memory, for each work-group, in bytes, that will be used when the kernel is launched"
- type: "uint32_t*"
name: "pGroupCountRet"
desc: "[out] pointer to maximum number of groups"
Expand Down
3 changes: 3 additions & 0 deletions scripts/core/registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,9 @@ etors:
- name: COMMAND_BUFFER_COMMAND_GET_INFO_EXP
desc: Enumerator for $xCommandBufferCommandGetInfoExp
value: '219'
- name: DEVICE_GET_SELECTED
desc: Enumerator for $xDeviceGetSelected
value: '220'
---
type: enum
desc: Defines structure types
Expand Down
17 changes: 14 additions & 3 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,13 +245,14 @@ setKernelParams(const ur_context_handle_t Context,
return UR_RESULT_SUCCESS;
};

size_t KernelLocalWorkGroupSize = 0;
size_t KernelLocalWorkGroupSize = 1;
for (size_t Dim = 0; Dim < WorkDim; Dim++) {
auto Err = IsValid(Dim);
if (Err != UR_RESULT_SUCCESS)
return Err;
// If no error then sum the total local work size per dim.
KernelLocalWorkGroupSize += LocalWorkSize[Dim];
// If no error then compute the total local work size as a product of
// all dims.
KernelLocalWorkGroupSize *= LocalWorkSize[Dim];
}

if (hasExceededMaxRegistersPerBlock(Device, Kernel,
Expand Down Expand Up @@ -493,6 +494,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
return Result;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize,
numEventsInWaitList, phEventWaitList, phEvent);
}

/// Set parameters for general 3D memory copy.
/// If the source and/or destination is on the device, SrcPtr and/or DstPtr
/// must be a pointer to a CUdeviceptr
Expand Down
10 changes: 10 additions & 0 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, size_t localWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
(void)hKernel;
(void)localWorkSize;
(void)dynamicSharedMemorySize;
*pGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
const ur_kernel_arg_value_properties_t *pProperties,
Expand Down
6 changes: 4 additions & 2 deletions source/adapters/cuda/ur_interface_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
return result;
}

pDdiTable->pfnCooperativeKernelLaunchExp = nullptr;
pDdiTable->pfnCooperativeKernelLaunchExp =
urEnqueueCooperativeKernelLaunchExp;

return UR_RESULT_SUCCESS;
}
Expand All @@ -416,7 +417,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
return result;
}

pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr;
pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
urKernelSuggestMaxCooperativeGroupCountExp;

return UR_RESULT_SUCCESS;
}
Expand Down
10 changes: 10 additions & 0 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
return Result;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize,
numEventsInWaitList, phEventWaitList, phEvent);
}

/// Enqueues a wait on the given queue for all events.
/// See \ref enqueueEventWait
///
Expand Down
10 changes: 10 additions & 0 deletions source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ urKernelGetNativeHandle(ur_kernel_handle_t, ur_native_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, size_t localWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
(void)hKernel;
(void)localWorkSize;
(void)dynamicSharedMemorySize;
*pGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
const ur_kernel_arg_value_properties_t *, const void *pArgValue) {
Expand Down
Loading

0 comments on commit a0bebf1

Please sign in to comment.