Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPENCL] Refactor ext functions caching #1325

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions source/adapters/opencl/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
uint32_t *pNumAdapters) {
if (NumEntries > 0 && phAdapters) {
std::lock_guard<std::mutex> Lock{adapter.Mutex};
if (adapter.RefCount++ == 0) {
cl_ext::ExtFuncPtrCache = std::make_unique<cl_ext::ExtFuncPtrCacheT>();
}
adapter.RefCount++;

*phAdapters = &adapter;
}
Expand All @@ -43,9 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {

UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
std::lock_guard<std::mutex> Lock{adapter.Mutex};
if (--adapter.RefCount == 0) {
cl_ext::ExtFuncPtrCache.reset();
}
--adapter.RefCount;
return UR_RESULT_SUCCESS;
}

Expand Down
206 changes: 99 additions & 107 deletions source/adapters/opencl/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

#include "command_buffer.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "kernel.hpp"
#include "memory.hpp"
#include "platform.hpp"
#include "queue.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
Expand All @@ -19,26 +25,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_queue_handle_t Queue = nullptr;
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache,
cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR);
ur_platform_handle_t Platform = hDevice->Platform;
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR =
Platform->ExtFuncPtr->clCreateCommandBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCreateCommandBufferKHR,
cl_ext::CreateCommandBufferName,
"cl_khr_command_buffer"));

if (!clCreateCommandBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;

auto CLCommandBuffer = clCreateCommandBufferKHR(
1, cl_adapter::cast<cl_command_queue *>(&Queue), nullptr, &Res);
cl_int Res = 0;
cl_command_queue CLQueue = Queue->get();
auto CLCommandBuffer = clCreateCommandBufferKHR(1, &CLQueue, nullptr, &Res);
CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer);

try {
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
Queue, hContext, CLCommandBuffer);
*phCommandBuffer = URCommandBuffer.release();
} catch (...) {
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

CL_RETURN_ON_FAILURE(Res);
Expand All @@ -49,14 +55,12 @@ UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue));

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr;
cl_int Res = cl_ext::getExtFuncFromContext<decltype(clRetainCommandBuffer)>(
CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache,
cl_ext::RetainCommandBufferName, &clRetainCommandBuffer);

if (!clRetainCommandBuffer || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer =
Platform->ExtFuncPtr->clRetainCommandBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clRetainCommandBuffer,
cl_ext::RetainCommandBufferName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer));
return UR_RESULT_SUCCESS;
Expand All @@ -66,15 +70,12 @@ UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue));

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache,
cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR);

if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR =
Platform->ExtFuncPtr->clReleaseCommandBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clReleaseCommandBufferKHR,
cl_ext::ReleaseCommandBufferName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(
clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer));
Expand All @@ -83,15 +84,12 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {

UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache,
cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR);

if (!clFinalizeCommandBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR =
Platform->ExtFuncPtr->clFinalizeCommandBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clFinalizeCommandBufferKHR,
cl_ext::FinalizeCommandBufferName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(
clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer));
Expand All @@ -107,21 +105,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_sync_point_t *pSyncPoint,
ur_exp_command_buffer_command_handle_t *) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache,
cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR);

if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR =
Platform->ExtFuncPtr->clCommandNDRangeKernelKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandNDRangeKernelKHR,
cl_ext::CommandNRRangeKernelName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hKernel->get(),
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -155,18 +149,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
cl_int Res = cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR);

if (!clCommandCopyBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR =
Platform->ExtFuncPtr->clCommandCopyBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandCopyBufferKHR,
cl_ext::CommandCopyBufferName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(),
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));

Expand All @@ -191,19 +182,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z};
size_t OpenCLRegion[3]{region.width, region.height, region.depth};

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR);

if (!clCommandCopyBufferRectKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR =
Platform->ExtFuncPtr->clCommandCopyBufferRectKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandCopyBufferRectKHR,
cl_ext::CommandCopyBufferRectName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
hCommandBuffer->CLCommandBuffer, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(),
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));
Expand Down Expand Up @@ -281,19 +268,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
cl_int Res = cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR);

if (!clCommandFillBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR =
Platform->ExtFuncPtr->clCommandFillBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandFillBufferKHR,
cl_ext::CommandFillBufferName,
"cl_khr_command_buffer"));

CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr,
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, hBuffer->get(), pPattern,
patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -337,24 +322,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache,
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR);

if (!clEnqueueCommandBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR =
Platform->ExtFuncPtr->clEnqueueCommandBufferKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueCommandBufferKHR,
cl_ext::EnqueueCommandBufferName,
"cl_khr_command_buffer"));

const uint32_t NumberOfQueues = 1;

cl_event Event;
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->get();
}
cl_command_queue CLQueue = hQueue->get();
CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR(
NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
hCommandBuffer->CLCommandBuffer, numEventsInWaitList,
cl_adapter::cast<const cl_event *>(phEventWaitList),
cl_adapter::cast<cl_event *>(phEvent)));

NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer,
numEventsInWaitList, CLWaitEvents.data(), &Event));
if (phEvent) {
try {
auto UREvent =
std::make_unique<ur_event_handle_t_>(Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}
return UR_RESULT_SUCCESS;
}

Expand All @@ -380,15 +375,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp(
ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue,
size_t *pPropSizeRet) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clGetCommandBufferInfoKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache,
cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR);

if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;
ur_platform_handle_t Platform = hCommandBuffer->getPlatform();
cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR =
Platform->ExtFuncPtr->clGetCommandBufferInfoKHRCache;
UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clGetCommandBufferInfoKHR,
cl_ext::GetCommandBufferInfoName,
"cl_khr_command_buffer"));

if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) {
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/opencl/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <CL/cl_ext.h>
#include <ur/ur.hpp>

#include "context.hpp"

struct ur_exp_command_buffer_handle_t_ {
ur_queue_handle_t hInternalQueue;
ur_context_handle_t hContext;
Expand All @@ -21,4 +23,6 @@ struct ur_exp_command_buffer_handle_t_ {
cl_command_buffer_khr CLCommandBuffer)
: hInternalQueue(hQueue), hContext(hContext),
CLCommandBuffer(CLCommandBuffer) {}

ur_platform_handle_t getPlatform() { return hContext->Devices[0]->Platform; }
};
Loading
Loading