Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPENCL] Add UR handles to OPENCL adapter #1176

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 47 additions & 31 deletions source/adapters/opencl/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@

#include "command_buffer.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "kernel.hpp"
#include "memory.hpp"
#include "queue.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
Expand All @@ -19,7 +24,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_queue_handle_t Queue = nullptr;
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->get();
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
Expand All @@ -28,17 +33,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(

if (!clCreateCommandBufferKHR || Res != CL_SUCCESS)
return UR_RESULT_ERROR_INVALID_OPERATION;

auto CLCommandBuffer = clCreateCommandBufferKHR(
1, cl_adapter::cast<cl_command_queue *>(&Queue), nullptr, &Res);
cl_command_queue CLQueue = Queue->get();
auto CLCommandBuffer = clCreateCommandBufferKHR(1, &CLQueue, nullptr, &Res);
CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer);

try {
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
Queue, hContext, CLCommandBuffer);
*phCommandBuffer = URCommandBuffer.release();
} catch (...) {
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

CL_RETURN_ON_FAILURE(Res);
Expand All @@ -49,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue));

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr;
cl_int Res = cl_ext::getExtFuncFromContext<decltype(clRetainCommandBuffer)>(
CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache,
Expand All @@ -66,7 +72,7 @@ UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue));

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
Expand All @@ -83,7 +89,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {

UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
Expand All @@ -107,7 +113,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_sync_point_t *pSyncPoint,
ur_exp_command_buffer_command_handle_t *) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
Expand All @@ -118,10 +124,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
return UR_RESULT_ERROR_INVALID_OPERATION;

CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hKernel->get(),
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -155,7 +160,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
cl_int Res = cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
Expand All @@ -165,8 +170,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
return UR_RESULT_ERROR_INVALID_OPERATION;

CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(),
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));

Expand All @@ -191,7 +195,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z};
size_t OpenCLRegion[3]{region.width, region.height, region.depth};

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
Expand All @@ -202,8 +206,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
return UR_RESULT_ERROR_INVALID_OPERATION;

CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
hCommandBuffer->CLCommandBuffer, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(),
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));
Expand Down Expand Up @@ -281,7 +284,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
cl_int Res = cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
Expand All @@ -291,9 +294,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
return UR_RESULT_ERROR_INVALID_OPERATION;

CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr,
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, hBuffer->get(), pPattern,
patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -337,7 +340,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
Expand All @@ -348,13 +351,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
return UR_RESULT_ERROR_INVALID_OPERATION;

const uint32_t NumberOfQueues = 1;

cl_event Event;
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->get();
}
cl_command_queue CLQueue = hQueue->get();
CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR(
NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
hCommandBuffer->CLCommandBuffer, numEventsInWaitList,
cl_adapter::cast<const cl_event *>(phEventWaitList),
cl_adapter::cast<cl_event *>(phEvent)));

NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer,
numEventsInWaitList, CLWaitEvents.data(), &Event));
if (phEvent) {
try {
auto UREvent =
std::make_unique<ur_event_handle_t_>(Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}
return UR_RESULT_SUCCESS;
}

Expand All @@ -380,7 +396,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp(
ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue,
size_t *pPropSizeRet) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clGetCommandBufferInfoKHR)>(
Expand Down
116 changes: 44 additions & 72 deletions source/adapters/opencl/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,38 @@
#include <set>
#include <unordered_map>

ur_result_t cl_adapter::getDevicesFromContext(
ur_context_handle_t hContext,
std::unique_ptr<std::vector<cl_device_id>> &DevicesInCtx) {

cl_uint DeviceCount;
CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast<cl_context>(hContext),
CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint),
&DeviceCount, nullptr));

if (DeviceCount < 1) {
return UR_RESULT_ERROR_INVALID_CONTEXT;
}

DevicesInCtx = std::make_unique<std::vector<cl_device_id>>(DeviceCount);

CL_RETURN_ON_FAILURE(clGetContextInfo(
cl_adapter::cast<cl_context>(hContext), CL_CONTEXT_DEVICES,
DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr));

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
uint32_t DeviceCount, const ur_device_handle_t *phDevices,
const ur_context_properties_t *, ur_context_handle_t *phContext) {

cl_int Ret;
*phContext = cl_adapter::cast<ur_context_handle_t>(
clCreateContext(nullptr, cl_adapter::cast<cl_uint>(DeviceCount),
cl_adapter::cast<const cl_device_id *>(phDevices),
nullptr, nullptr, cl_adapter::cast<cl_int *>(&Ret)));

return mapCLErrorToUR(Ret);
}
std::vector<cl_device_id> CLDevices(DeviceCount);
for (size_t i = 0; i < DeviceCount; i++) {
CLDevices[i] = phDevices[i]->get();
}

static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) {

cl_int CLPropName;
switch (URPropName) {
case UR_CONTEXT_INFO_NUM_DEVICES:
CLPropName = CL_CONTEXT_NUM_DEVICES;
break;
case UR_CONTEXT_INFO_DEVICES:
CLPropName = CL_CONTEXT_DEVICES;
break;
case UR_CONTEXT_INFO_REFERENCE_COUNT:
CLPropName = CL_CONTEXT_REFERENCE_COUNT;
break;
default:
CLPropName = -1;
try {
cl_context Ctx = clCreateContext(
nullptr, cl_adapter::cast<cl_uint>(DeviceCount), CLDevices.data(),
nullptr, nullptr, cl_adapter::cast<cl_int *>(&Ret));
CL_RETURN_ON_FAILURE(Ret);
auto URContext =
std::make_unique<ur_context_handle_t_>(Ctx, DeviceCount, phDevices);
*phContext = URContext.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

return CLPropName;
return mapCLErrorToUR(Ret);
}

UR_APIEXPORT ur_result_t UR_APICALL
urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {

UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
const cl_int CLPropName = mapURContextInfoToCL(propName);

switch (static_cast<uint32_t>(propName)) {
/* 2D USM memops are not supported. */
Expand All @@ -90,21 +61,14 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,
* queries of each device separately and building the intersection set. */
return UR_RESULT_ERROR_INVALID_ARGUMENT;
}
case UR_CONTEXT_INFO_NUM_DEVICES:
case UR_CONTEXT_INFO_DEVICES:
case UR_CONTEXT_INFO_NUM_DEVICES: {
return ReturnValue(hContext->DeviceCount);
}
case UR_CONTEXT_INFO_DEVICES: {
return ReturnValue(&hContext->Devices[0], hContext->DeviceCount);
}
case UR_CONTEXT_INFO_REFERENCE_COUNT: {
size_t CheckPropSize = 0;
auto ClResult =
clGetContextInfo(cl_adapter::cast<cl_context>(hContext), CLPropName,
propSize, pPropValue, &CheckPropSize);
if (pPropValue && CheckPropSize != propSize) {
return UR_RESULT_ERROR_INVALID_SIZE;
}
CL_RETURN_ON_FAILURE(ClResult);
if (pPropSizeRet) {
*pPropSizeRet = CheckPropSize;
}
return UR_RESULT_SUCCESS;
return ReturnValue(hContext->getReferenceCount());
}
default:
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand All @@ -113,34 +77,42 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,

UR_APIEXPORT ur_result_t UR_APICALL
urContextRelease(ur_context_handle_t hContext) {

cl_int Ret = clReleaseContext(cl_adapter::cast<cl_context>(hContext));
return mapCLErrorToUR(Ret);
if (hContext->decrementReferenceCount() == 0) {
delete hContext;
} else {
CL_RETURN_ON_FAILURE(clReleaseContext(hContext->get()));
}
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urContextRetain(ur_context_handle_t hContext) {

cl_int Ret = clRetainContext(cl_adapter::cast<cl_context>(hContext));
return mapCLErrorToUR(Ret);
CL_RETURN_ON_FAILURE(clRetainContext(hContext->get()));
hContext->incrementReferenceCount();
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle(
ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) {

*phNativeContext = reinterpret_cast<ur_native_handle_t>(hContext);
*phNativeContext = reinterpret_cast<ur_native_handle_t>(hContext->get());
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *,
ur_native_handle_t hNativeContext, uint32_t numDevices,
const ur_device_handle_t *phDevices,
const ur_context_native_properties_t *pProperties,
ur_context_handle_t *phContext) {

*phContext = reinterpret_cast<ur_context_handle_t>(hNativeContext);
cl_context NativeHandle = reinterpret_cast<cl_context>(hNativeContext);
UR_RETURN_ON_FAILURE(ur_context_handle_t_::makeWithNative(
NativeHandle, numDevices, phDevices, *phContext));

if (!pProperties || !pProperties->isNativeHandleOwned) {
return urContextRetain(*phContext);
CL_RETURN_ON_FAILURE(clRetainContext(NativeHandle));
}

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -190,8 +162,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
auto *C = static_cast<ContextCallback *>(pUserData);
C->execute();
};
CL_RETURN_ON_FAILURE(clSetContextDestructorCallback(
cl_adapter::cast<cl_context>(hContext), ClCallback, Callback));
CL_RETURN_ON_FAILURE(
clSetContextDestructorCallback(hContext->get(), ClCallback, Callback));

return UR_RESULT_SUCCESS;
}
Loading