diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index c68556e92c..3469620b71 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -31,8 +31,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( ze_kernel_handle_t ZeKernel{}; UR_CALL(getZeKernel(Legacy(hQueue)->Device->ZeDevice, hKernel, &ZeKernel)); - UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue), ZeKernel, GlobalWorkSize3D, - LocalWorkSize)); + UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue)->Device, ZeKernel, + GlobalWorkSize3D, LocalWorkSize)); std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize); return UR_RESULT_SUCCESS; @@ -54,52 +54,6 @@ ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, return UR_RESULT_SUCCESS; } -ur_result_t getSuggestedLocalWorkSize(ur_queue_handle_legacy_t hQueue, - ze_kernel_handle_t hZeKernel, - size_t GlobalWorkSize3D[3], - uint32_t SuggestedLocalWorkSize3D[3]) { - uint32_t *WG = SuggestedLocalWorkSize3D; - - // We can't call to zeKernelSuggestGroupSize if 64-bit GlobalWorkSize - // values do not fit to 32-bit that the API only supports currently. - bool SuggestGroupSize = true; - for (int I : {0, 1, 2}) { - if (GlobalWorkSize3D[I] > UINT32_MAX) { - SuggestGroupSize = false; - } - } - if (SuggestGroupSize) { - ZE2UR_CALL(zeKernelSuggestGroupSize, - (hZeKernel, GlobalWorkSize3D[0], GlobalWorkSize3D[1], - GlobalWorkSize3D[2], &WG[0], &WG[1], &WG[2])); - } else { - for (int I : {0, 1, 2}) { - // Try to find a I-dimension WG size that the GlobalWorkSize[I] is - // fully divisable with. Start with the max possible size in - // each dimension. - uint32_t GroupSize[] = { - hQueue->Device->ZeDeviceComputeProperties->maxGroupSizeX, - hQueue->Device->ZeDeviceComputeProperties->maxGroupSizeY, - hQueue->Device->ZeDeviceComputeProperties->maxGroupSizeZ}; - GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize3D[I]); - while (GlobalWorkSize3D[I] % GroupSize[I]) { - --GroupSize[I]; - } - if (GlobalWorkSize3D[I] / GroupSize[I] > UINT32_MAX) { - logger::error("getSuggestedLocalWorkSize: can't find a WG size " - "suitable for global work size > UINT32_MAX"); - return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; - } - WG[I] = GroupSize[I]; - } - logger::debug( - "getSuggestedLocalWorkSize: using computed WG size = {{{}, {}, {}}}", - WG[0], WG[1], WG[2]); - } - - return UR_RESULT_SUCCESS; -} - ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify diff --git a/source/adapters/level_zero/kernel.hpp b/source/adapters/level_zero/kernel.hpp index b708973f60..9444ff0084 100644 --- a/source/adapters/level_zero/kernel.hpp +++ b/source/adapters/level_zero/kernel.hpp @@ -108,41 +108,5 @@ struct ur_kernel_handle_t_ : _ur_object { ZeCache ZeKernelName; }; -ur_result_t getSuggestedLocalWorkSize(ur_queue_handle_legacy_t hQueue, - ze_kernel_handle_t hZeKernel, - size_t GlobalWorkSize3D[3], - uint32_t SuggestedLocalWorkSize3D[3]); ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, ze_kernel_handle_t *phZeKernel); - -/** - * Calculates a work group size for the kernel based on the GlobalWorkSize or - * the LocalWorkSize if provided. - * @param[in][optional] Kernel The Kernel. Used when LocalWorkSize is not - * provided. - * @param[in][optional] Device The device associated with the kernel. Used when - * LocalWorkSize is not provided. - * @param[out] ZeThreadGroupDimensions Number of work groups in each dimension. - * @param[out] WG The work group size for each dimension. - * @param[in] WorkDim The number of dimensions in the kernel. - * @param[in] GlobalWorkSize The global work size. - * @param[in][optional] LocalWorkSize The local work size. - * @return UR_RESULT_SUCCESS or an error code on failure. - */ -ur_result_t calculateKernelWorkDimensions( - ur_kernel_handle_t Kernel, ur_device_handle_t Device, - ze_group_count_t &ZeThreadGroupDimensions, uint32_t (&WG)[3], - uint32_t WorkDim, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize); - -/** - * Sets the global offset for a kernel command that will be appended to the - * command buffer. - * @param[in] Context Context associated with the queue. - * @param[in] Kernel The handle to the kernel that will be appended. - * @param[in] GlobalWorkOffset The global offset value. - * @return UR_RESULT_SUCCESS or an error code on failure - */ -ur_result_t setKernelGlobalOffset(ur_context_handle_t Context, - ur_kernel_handle_t Kernel, - const size_t *GlobalWorkOffset);