Skip to content

Commit

Permalink
Merge pull request #1974 from igchor/remove_duplicate_decl
Browse files Browse the repository at this point in the history
[L0] remove leftover, duplicated getSuggestedLocalWorkSize implementation
  • Loading branch information
igchor authored Aug 21, 2024
2 parents 2184e80 + b4195c2 commit af18099
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 84 deletions.
50 changes: 2 additions & 48 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
ze_kernel_handle_t ZeKernel{};
UR_CALL(getZeKernel(Legacy(hQueue)->Device->ZeDevice, hKernel, &ZeKernel));

UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue), ZeKernel, GlobalWorkSize3D,
LocalWorkSize));
UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue)->Device, ZeKernel,
GlobalWorkSize3D, LocalWorkSize));

std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize);
return UR_RESULT_SUCCESS;
Expand All @@ -54,52 +54,6 @@ ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel,
return UR_RESULT_SUCCESS;
}

ur_result_t getSuggestedLocalWorkSize(ur_queue_handle_legacy_t hQueue,
ze_kernel_handle_t hZeKernel,
size_t GlobalWorkSize3D[3],
uint32_t SuggestedLocalWorkSize3D[3]) {
uint32_t *WG = SuggestedLocalWorkSize3D;

// We can't call to zeKernelSuggestGroupSize if 64-bit GlobalWorkSize
// values do not fit to 32-bit that the API only supports currently.
bool SuggestGroupSize = true;
for (int I : {0, 1, 2}) {
if (GlobalWorkSize3D[I] > UINT32_MAX) {
SuggestGroupSize = false;
}
}
if (SuggestGroupSize) {
ZE2UR_CALL(zeKernelSuggestGroupSize,
(hZeKernel, GlobalWorkSize3D[0], GlobalWorkSize3D[1],
GlobalWorkSize3D[2], &WG[0], &WG[1], &WG[2]));
} else {
for (int I : {0, 1, 2}) {
// Try to find a I-dimension WG size that the GlobalWorkSize[I] is
// fully divisable with. Start with the max possible size in
// each dimension.
uint32_t GroupSize[] = {
hQueue->Device->ZeDeviceComputeProperties->maxGroupSizeX,
hQueue->Device->ZeDeviceComputeProperties->maxGroupSizeY,
hQueue->Device->ZeDeviceComputeProperties->maxGroupSizeZ};
GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize3D[I]);
while (GlobalWorkSize3D[I] % GroupSize[I]) {
--GroupSize[I];
}
if (GlobalWorkSize3D[I] / GroupSize[I] > UINT32_MAX) {
logger::error("getSuggestedLocalWorkSize: can't find a WG size "
"suitable for global work size > UINT32_MAX");
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
}
WG[I] = GroupSize[I];
}
logger::debug(
"getSuggestedLocalWorkSize: using computed WG size = {{{}, {}, {}}}",
WG[0], WG[1], WG[2]);
}

return UR_RESULT_SUCCESS;
}

ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify
Expand Down
36 changes: 0 additions & 36 deletions source/adapters/level_zero/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,41 +108,5 @@ struct ur_kernel_handle_t_ : _ur_object {
ZeCache<std::string> ZeKernelName;
};

ur_result_t getSuggestedLocalWorkSize(ur_queue_handle_legacy_t hQueue,
ze_kernel_handle_t hZeKernel,
size_t GlobalWorkSize3D[3],
uint32_t SuggestedLocalWorkSize3D[3]);
ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel,
ze_kernel_handle_t *phZeKernel);

/**
* Calculates a work group size for the kernel based on the GlobalWorkSize or
* the LocalWorkSize if provided.
* @param[in][optional] Kernel The Kernel. Used when LocalWorkSize is not
* provided.
* @param[in][optional] Device The device associated with the kernel. Used when
* LocalWorkSize is not provided.
* @param[out] ZeThreadGroupDimensions Number of work groups in each dimension.
* @param[out] WG The work group size for each dimension.
* @param[in] WorkDim The number of dimensions in the kernel.
* @param[in] GlobalWorkSize The global work size.
* @param[in][optional] LocalWorkSize The local work size.
* @return UR_RESULT_SUCCESS or an error code on failure.
*/
ur_result_t calculateKernelWorkDimensions(
ur_kernel_handle_t Kernel, ur_device_handle_t Device,
ze_group_count_t &ZeThreadGroupDimensions, uint32_t (&WG)[3],
uint32_t WorkDim, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize);

/**
* Sets the global offset for a kernel command that will be appended to the
* command buffer.
* @param[in] Context Context associated with the queue.
* @param[in] Kernel The handle to the kernel that will be appended.
* @param[in] GlobalWorkOffset The global offset value.
* @return UR_RESULT_SUCCESS or an error code on failure
*/
ur_result_t setKernelGlobalOffset(ur_context_handle_t Context,
ur_kernel_handle_t Kernel,
const size_t *GlobalWorkOffset);

0 comments on commit af18099

Please sign in to comment.