From b8133878c9ab418a196109f069c6aad4d8a598b1 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 19 Dec 2023 10:39:19 +0000 Subject: [PATCH 1/4] Testing adding handles to opencl --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 15 +++-- sycl/source/detail/buffer_impl.cpp | 14 ++-- sycl/source/event.cpp | 15 ++--- sycl/source/queue.cpp | 26 ++++++-- .../DeprecatedFeatures/set_arg_interop.cpp | 35 +++++++++- sycl/unittests/handler/CMakeLists.txt | 1 - .../handler/SetArgForLocalAccessor.cpp | 54 --------------- sycl/unittests/thread_safety/CMakeLists.txt | 1 - .../thread_safety/InteropKernelEnqueue.cpp | 66 ------------------- 9 files changed, 77 insertions(+), 150 deletions(-) delete mode 100644 sycl/unittests/handler/SetArgForLocalAccessor.cpp delete mode 100644 sycl/unittests/thread_safety/InteropKernelEnqueue.cpp diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 8f59101cbeb32..beab49096c01a 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,12 +116,15 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit af7e275b509b41f54a66743ebf748dfb51668abf - # Author: Maosu Zhao - # Date: Thu Oct 17 16:31:21 2024 +0800 - # [DeviceSanitizer] Refactor the code to manage shadow memory (#2127) - set(UNIFIED_RUNTIME_TAG af7e275b509b41f54a66743ebf748dfb51668abf) + set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") + # commit df6da35d6e67f2383db28dd49ab08c5c0ef541d2 + # Merge: 67590533 55bd5636 + # Author: aarongreig + # Date: Mon Oct 7 12:28:07 2024 +0100 + # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize + # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported + # instead of returning misleading default value + set(UNIFIED_RUNTIME_TAG 50121a6f41b9ba4632cc9a9068e34a7c7b31f9e0) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 777091f6be572..1f889776f0451 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -49,15 +49,11 @@ void buffer_impl::destructorNotification(void *UserObj) { void buffer_impl::addInteropObject( std::vector &Handles) const { if (MOpenCLInterop) { - if (std::find(Handles.begin(), Handles.end(), - ur::cast(MInteropMemObject)) == - Handles.end()) { - const AdapterPtr &Adapter = getAdapter(); - Adapter->call( - ur::cast(MInteropMemObject)); - ur_native_handle_t NativeHandle = 0; - Adapter->call(MInteropMemObject, nullptr, - &NativeHandle); + const AdapterPtr &Adapter = getAdapter(); + ur_native_handle_t NativeHandle = 0; + Adapter->call(MInteropMemObject, nullptr, + &NativeHandle); + if (std::find(Handles.begin(), Handles.end(), NativeHandle) == Handles.end()) { Handles.push_back(NativeHandle); } } diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index df68777ca6df4..473320fde1d23 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -24,14 +24,13 @@ inline namespace _V1 { event::event() : impl(std::make_shared(std::nullopt)) {} -event::event(cl_event ClEvent, const context &SyclContext) - : impl(std::make_shared( - detail::ur::cast(ClEvent), SyclContext)) { - // This is a special interop constructor for OpenCL, so the event must be - // retained. - // TODO(pi2ur): Don't just cast from cl_event above - impl->getAdapter()->call( - detail::ur::cast(ClEvent)); +event::event(cl_event ClEvent, const context &SyclContext) { + ur_event_handle_t hEvent = nullptr; + impl->getAdapter()->call( + detail::ur::cast(ClEvent), + detail::getSyclObjImpl(SyclContext)->getHandleRef(), nullptr, &hEvent); + + impl = std::make_shared(hEvent, SyclContext); } bool event::operator==(const event &rhs) const { return rhs.impl == impl; } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 43abe91b20014..10867ea609f5e 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -63,10 +63,28 @@ queue::queue(const context &SyclContext, const device &SyclDevice, queue::queue(cl_command_queue clQueue, const context &SyclContext, const async_handler &AsyncHandler) { const property_list PropList{}; - impl = std::make_shared( - // TODO(pi2ur): Don't cast straight from cl_command_queue - reinterpret_cast(clQueue), - detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); + ur_queue_handle_t hQueue; + auto Context = detail::getSyclObjImpl(SyclContext); + auto Adapter = sycl::detail::ur::getAdapter(); + + // cl_device_id CLDevice; + // size_t Ret = clGetCommandQueueInfo(clQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), + // &CLDevice, nullptr); + // if (Ret) { + // throw runtime_error("Failed to retrieve device associated with the queue", + // PI_ERROR_INVALID_QUEUE); + // } + // sycl::detail::pi::PiDevice Device; + // Plugin->call( + // detail::pi::cast(CLDevice), nullptr, &Device); + + ur_queue_native_properties_t Properties[] = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; + Adapter->call( + detail::ur::cast(clQueue), Context->getHandleRef(), + nullptr, Properties, &hQueue); + + impl = std::make_shared(hQueue, Context, AsyncHandler, + PropList); } cl_command_queue queue::get() const { return impl->get(); } diff --git a/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp b/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp index 7d995bf9e5c1e..ee46cb27fa2f0 100644 --- a/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp @@ -10,6 +10,7 @@ #include #include +#include using namespace sycl; @@ -19,7 +20,7 @@ int main() { cl_context ClContext = Context.get(); - const size_t CountSources = 3; + const size_t CountSources = 4; const char *Sources[CountSources] = { "kernel void foo1(global float* Array, global int* Value) { *Array = " "42; *Value = 1; }\n", @@ -27,6 +28,7 @@ int main() { "Array[id] = id; }\n", "kernel void foo3(global float* Array, local float* LocalArray) { " "(void)LocalArray; (void)Array; }\n", + "kernel void foo4(global int* Value) {}\n", }; cl_int Err; @@ -46,12 +48,16 @@ int main() { cl_kernel ThirdCLKernel = clCreateKernel(ClProgram, "foo3", &Err); assert(Err == CL_SUCCESS); + cl_kernel FourthCLKernel = clCreateKernel(ClProgram, "foo4", &Err); + assert(Err == CL_SUCCESS); + const size_t Count = 100; float Array[Count]; kernel FirstKernel(FirstCLKernel, Context); kernel SecondKernel(SecondCLKernel, Context); kernel ThirdKernel(ThirdCLKernel, Context); + kernel FourthKernel(FourthCLKernel, Context); int Value; { buffer FirstBuffer(Array, range<1>(1)); @@ -114,10 +120,37 @@ int main() { } Queue.wait_and_throw(); + // Enqueuing an interop kernel while avoid calls to piKernelSetArg from + // different threads on the same kernel. + { + constexpr std::size_t NArgs = 16; + constexpr std::size_t ThreadCount = 4; + constexpr std::size_t LaunchCount = 8; + auto TestLambda = [&](int ThreadId) { + Queue + .submit([&](sycl::handler &CGH) { + for (std::size_t I = 0; I < NArgs; ++I) + CGH.set_arg(I, &ThreadId); + }) + .wait(); + }; + + std::vector threadPool; + threadPool.reserve(ThreadCount); + for (size_t tid = 0; tid < ThreadCount; ++tid) { + threadPool.push_back(std::thread(TestLambda, tid)); + } + + for (auto ¤tThread : threadPool) { + currentThread.join(); + } + } + clReleaseContext(ClContext); clReleaseKernel(FirstCLKernel); clReleaseKernel(SecondCLKernel); clReleaseKernel(ThirdCLKernel); + clReleaseKernel(FourthCLKernel); clReleaseProgram(ClProgram); return 0; } diff --git a/sycl/unittests/handler/CMakeLists.txt b/sycl/unittests/handler/CMakeLists.txt index eb7fc559ab73c..3f90404ab35b9 100644 --- a/sycl/unittests/handler/CMakeLists.txt +++ b/sycl/unittests/handler/CMakeLists.txt @@ -1,4 +1,3 @@ add_sycl_unittest(HandlerTests OBJECT - SetArgForLocalAccessor.cpp require.cpp ) diff --git a/sycl/unittests/handler/SetArgForLocalAccessor.cpp b/sycl/unittests/handler/SetArgForLocalAccessor.cpp deleted file mode 100644 index 7a9079872ce36..0000000000000 --- a/sycl/unittests/handler/SetArgForLocalAccessor.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//==------- SetArgForLocalAccessor.cpp --- Handler unit tests --------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ur_mock_helpers.hpp" -#include -#include -#include - -#include - -// This test checks that we pass the correct buffer size value when setting -// local_accessor as an argument through handler::set_arg to a kernel created -// using OpenCL interoperability methods. - -namespace { - -size_t LocalBufferArgSize = 0; - -ur_result_t redefined_urKernelSetArgLocal(void *pParams) { - auto params = *static_cast(pParams); - LocalBufferArgSize = *params.pargSize; - - return UR_RESULT_SUCCESS; -} - -TEST(HandlerSetArg, LocalAccessor) { - sycl::unittest::UrMock<> Mock; - redefineMockForKernelInterop(Mock); - mock::getCallbacks().set_replace_callback("urKernelSetArgLocal", - &redefined_urKernelSetArgLocal); - - constexpr size_t Size = 128; - sycl::queue Q; - - ur_native_handle_t handle = mock::createDummyHandle(); - auto KernelCL = reinterpret_cast::template input_type>(&handle); - auto Kernel = - sycl::make_kernel(KernelCL, Q.get_context()); - - Q.submit([&](sycl::handler &CGH) { - sycl::local_accessor Acc(Size, CGH); - CGH.set_arg(0, Acc); - CGH.single_task(Kernel); - }).wait(); - - ASSERT_EQ(LocalBufferArgSize, Size * sizeof(float)); -} -} // namespace diff --git a/sycl/unittests/thread_safety/CMakeLists.txt b/sycl/unittests/thread_safety/CMakeLists.txt index 8b725af8b4dd4..78dc6f2190178 100644 --- a/sycl/unittests/thread_safety/CMakeLists.txt +++ b/sycl/unittests/thread_safety/CMakeLists.txt @@ -1,4 +1,3 @@ add_sycl_unittest(ThreadSafetyTests OBJECT HostAccessorDeadLock.cpp - InteropKernelEnqueue.cpp ) diff --git a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp b/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp deleted file mode 100644 index ca54cf0d908d6..0000000000000 --- a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp +++ /dev/null @@ -1,66 +0,0 @@ -//==-------- InteropKernelEnqueue.cpp --- Thread safety unit tests ---------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include - -#include "ThreadUtils.h" -#include "ur_mock_helpers.hpp" - -namespace { -using namespace sycl; - -constexpr std::size_t NArgs = 16; -constexpr std::size_t ThreadCount = 4; -constexpr std::size_t LaunchCount = 8; - -uint32_t LastArgSet = -1; -std::size_t LastThread = -1; -ur_result_t redefined_urKernelSetArgValue(void *pParams) { - auto params = *static_cast(pParams); - EXPECT_EQ((LastArgSet + 1) % NArgs, *params.pargIndex); - LastArgSet = *params.pargIndex; - std::size_t ArgValue = *static_cast(*params.ppArgValue); - if (*params.pargIndex == 0) - LastThread = ArgValue; - else - EXPECT_EQ(LastThread, ArgValue); - return UR_RESULT_SUCCESS; -} - -TEST(KernelEnqueue, InteropKernel) { - unittest::UrMock<> Mock; - redefineMockForKernelInterop(Mock); - mock::getCallbacks().set_replace_callback("urKernelSetArgValue", - &redefined_urKernelSetArgValue); - - platform Plt = sycl::platform(); - queue Q; - - ur_native_handle_t Handle = mock::createDummyHandle(); - auto KernelCL = reinterpret_cast::template input_type>(&Handle); - auto Kernel = - sycl::make_kernel(KernelCL, Q.get_context()); - - auto TestLambda = [&](std::size_t ThreadId) { - Q.submit([&](sycl::handler &CGH) { - for (std::size_t I = 0; I < NArgs; ++I) - CGH.set_arg(I, ThreadId); - CGH.single_task(Kernel); - }).wait(); - }; - - for (std::size_t I = 0; I < LaunchCount; ++I) { - ThreadPool Pool(ThreadCount, TestLambda); - } -} -} // namespace From db11a388256189a01ce53851c5665c21fe1dec48 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Wed, 16 Oct 2024 15:44:22 +0100 Subject: [PATCH 2/4] Change interop api to not own the native handle --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/include/sycl/backend.hpp | 7 ++++--- sycl/source/backend.cpp | 16 ++++------------ sycl/source/detail/buffer_impl.cpp | 4 ---- sycl/source/detail/context_impl.cpp | 6 ------ sycl/source/detail/device_image_impl.hpp | 3 --- sycl/source/detail/device_impl.cpp | 3 --- sycl/source/detail/event_impl.cpp | 2 -- sycl/source/detail/kernel_impl.cpp | 8 -------- sycl/source/detail/kernel_impl.hpp | 4 ---- sycl/source/detail/queue_impl.cpp | 2 -- sycl/source/detail/queue_impl.hpp | 1 - sycl/source/detail/sycl_mem_obj_t.cpp | 6 ------ sycl/source/device.cpp | 1 - sycl/source/kernel.cpp | 17 +++++++++-------- sycl/source/queue.cpp | 11 ----------- .../interop-opencl-make-kernel-bundle.cpp | 9 --------- sycl/unittests/SYCL2020/GetNativeOpenCL.cpp | 6 +++--- sycl/unittests/queue/InteropRetain.cpp | 13 ++++++------- 19 files changed, 27 insertions(+), 94 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index beab49096c01a..93f69fe1a4845 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported # instead of returning misleading default value - set(UNIFIED_RUNTIME_TAG 50121a6f41b9ba4632cc9a9068e34a7c7b31f9e0) + set(UNIFIED_RUNTIME_TAG 7252088d76a910357dd3e00cb57e7a67fd3299bd) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 24750732b08ff..d8d7c86e4cffb 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -339,7 +339,7 @@ make_context( const async_handler &Handler = {}) { return detail::make_context( detail::ur::cast(BackendObject), Handler, Backend, - false /* KeepOwnership */); + true /* KeepOwnership */); } template @@ -348,8 +348,9 @@ std::enable_if_t::MakeQueue == true, make_queue(const typename backend_traits::template input_type &BackendObject, const context &TargetContext, const async_handler Handler = {}) { - auto KeepOwnership = - Backend == backend::ext_oneapi_cuda || Backend == backend::ext_oneapi_hip; + auto KeepOwnership = Backend == backend::ext_oneapi_cuda || + Backend == backend::ext_oneapi_hip || + Backend == backend::opencl; return detail::make_queue(detail::ur::cast(BackendObject), false, TargetContext, nullptr, KeepOwnership, {}, Handler, Backend); diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 2c876a570e3c6..70cea2ce4249b 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -161,7 +161,7 @@ __SYCL_EXPORT queue make_queue(ur_native_handle_t NativeHandle, __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, const context &Context, backend Backend) { - return make_event(NativeHandle, Context, false, Backend); + return make_event(NativeHandle, Context, true /* KeepOwnership */, Backend); } __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, @@ -179,9 +179,6 @@ __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, NativeHandle, ContextImpl->getHandleRef(), &Properties, &UrEvent); event Event = detail::createSyclObjFromImpl( std::make_shared(UrEvent, Context)); - - if (Backend == backend::opencl) - Adapter->call(UrEvent); return Event; } @@ -204,9 +201,6 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, sycl::make_error_code(sycl::errc::invalid), "urProgramCreateWithNativeHandle resulted in a null program handle."); - if (ContextImpl->getBackend() == backend::opencl) - Adapter->call(UrProgram); - std::vector ProgramDevices; uint32_t NumDevices = 0; @@ -310,7 +304,8 @@ std::shared_ptr make_kernel_bundle(ur_native_handle_t NativeHandle, const context &TargetContext, bundle_state State, backend Backend) { - return make_kernel_bundle(NativeHandle, TargetContext, false, State, Backend); + return make_kernel_bundle(NativeHandle, TargetContext, + true /* KeepOwnership*/, State, Backend); } kernel make_kernel(const context &TargetContext, @@ -351,9 +346,6 @@ kernel make_kernel(const context &TargetContext, NativeHandle, ContextImpl->getHandleRef(), UrProgram, &Properties, &UrKernel); - if (Backend == backend::opencl) - Adapter->call(UrKernel); - // Construct the SYCL queue from UR queue. return detail::createSyclObjFromImpl( std::make_shared(UrKernel, ContextImpl, KernelBundleImpl)); @@ -364,7 +356,7 @@ kernel make_kernel(ur_native_handle_t NativeHandle, return make_kernel( TargetContext, get_empty_interop_kernel_bundle(TargetContext), - NativeHandle, false, Backend); + NativeHandle, true /* KeepOwnership */, Backend); } } // namespace detail diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 1f889776f0451..e6b17265b9f8b 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -82,10 +82,6 @@ buffer_impl::getNativeVector(backend BackendName) const { auto Adapter = Platform->getAdapter(); - if (Platform->getBackend() == backend::opencl) { - Adapter->call(NativeMem); - } - ur_native_handle_t Handle = 0; // When doing buffer interop we don't know what device the memory should be // resident on, so pass nullptr for Device param. Buffer interop may not be diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index ce5f9f735dee6..0b7db9c4a6a4c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -109,15 +109,11 @@ context_impl::context_impl(ur_context_handle_t UrContext, // // TODO: Move this backend-specific retain of the context to SYCL-2020 style // make_context interop, when that is created. - if (getBackend() == sycl::backend::opencl) { - getAdapter()->call(MContext); - } MKernelProgramCache.setContextPtr(this); } cl_context context_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions - getAdapter()->call(MContext); ur_native_handle_t nativeHandle = 0; getAdapter()->call(MContext, &nativeHandle); @@ -296,8 +292,6 @@ context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const { ur_native_handle_t context_impl::getNative() const { const auto &Adapter = getAdapter(); - if (getBackend() == backend::opencl) - Adapter->call(getHandleRef()); ur_native_handle_t Handle; Adapter->call(getHandleRef(), &Handle); return Handle; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index ac58b7b80f467..d58c6fc202c84 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -299,9 +299,6 @@ class device_image_impl { assert(MProgram); const auto &ContextImplPtr = detail::getSyclObjImpl(MContext); const AdapterPtr &Adapter = ContextImplPtr->getAdapter(); - - if (ContextImplPtr->getBackend() == backend::opencl) - Adapter->call(MProgram); ur_native_handle_t NativeProgram = 0; Adapter->call(MProgram, &NativeProgram); diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index e0508b57e912b..ee37681ccce48 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -98,7 +98,6 @@ bool device_impl::is_affinity_supported( cl_device_id device_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions - getAdapter()->call(MDevice); return ur::cast(getNative()); } @@ -339,8 +338,6 @@ std::vector device_impl::create_sub_devices() const { ur_native_handle_t device_impl::getNative() const { auto Adapter = getAdapter(); - if (getBackend() == backend::opencl) - Adapter->call(getHandleRef()); ur_native_handle_t Handle; Adapter->call(getHandleRef(), &Handle); return Handle; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index f6e5edfc92e74..5ed49ebfd3802 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -494,8 +494,6 @@ ur_native_handle_t event_impl::getNative() { this->setHandle(UREvent); Handle = UREvent; } - if (MContext->getBackend() == backend::opencl) - Adapter->call(Handle); ur_native_handle_t OutHandle; Adapter->call(Handle, &OutHandle); return OutHandle; diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 986e78aa21530..6df569160de0e 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -24,14 +24,6 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, Context)), MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(true), MKernelArgMaskPtr{ArgMask} { - ur_context_handle_t UrContext = nullptr; - // Using the adapter from the passed ContextImpl - getAdapter()->call( - MKernel, UR_KERNEL_INFO_CONTEXT, sizeof(UrContext), &UrContext, nullptr); - if (Context->getHandleRef() != UrContext) - throw sycl::exception( - make_error_code(errc::invalid), - "Input context must be the same as the context of cl_kernel"); // Enable USM indirect access for interoperability kernels. // Some UR Adapters (like OpenCL) require this call to enable USM diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 1b71eb3e659ad..56c67bbcffbbe 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -74,7 +74,6 @@ class kernel_impl { /// /// \return a valid cl_kernel instance cl_kernel get() const { - getAdapter()->call(MKernel); ur_native_handle_t nativeHandle = 0; getAdapter()->call(MKernel, &nativeHandle); @@ -152,9 +151,6 @@ class kernel_impl { ur_native_handle_t getNative() const { const AdapterPtr &Adapter = MContext->getAdapter(); - if (MContext->getBackend() == backend::opencl) - Adapter->call(MKernel); - ur_native_handle_t NativeKernel = 0; Adapter->call(MKernel, &NativeKernel); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 428f06ea0aaa4..38a2f16bac1d2 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -698,8 +698,6 @@ void queue_impl::destructorNotification() { ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { const AdapterPtr &Adapter = getAdapter(); - if (getContextImplPtr()->getBackend() == backend::opencl) - Adapter->call(MQueues[0]); ur_native_handle_t Handle{}; ur_queue_native_desc_t UrNativeDesc{UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC, nullptr, nullptr}; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 42e769bbe2025..b7511f274e977 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -265,7 +265,6 @@ class queue_impl { /// \return an OpenCL interoperability queue handle. cl_command_queue get() { - getAdapter()->call(MQueues[0]); ur_native_handle_t nativeHandle = 0; getAdapter()->call(MQueues[0], nullptr, &nativeHandle); diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 25e092232ae7f..fdbcb474afcb0 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -56,9 +56,6 @@ SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, throw sycl::exception( make_error_code(errc::invalid), "Input context must be the same as the context of cl_mem"); - - if (MInteropContext->getBackend() == backend::opencl) - Adapter->call(MInteropMemObject); } ur_mem_type_t getImageType(int Dimensions) { @@ -111,9 +108,6 @@ SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, throw sycl::exception( make_error_code(errc::invalid), "Input context must be the same as the context of cl_mem"); - - if (MInteropContext->getBackend() == backend::opencl) - Adapter->call(MInteropMemObject); } void SYCLMemObjT::releaseMem(ContextImplPtr Context, void *MemAllocation) { diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 0fac067c9d81b..400a81d3fcc49 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -43,7 +43,6 @@ device::device(cl_device_id DeviceId) { auto Platform = detail::platform_impl::getPlatformFromUrDevice(Device, Adapter); impl = Platform->getOrMakeDeviceImpl(Device, Platform); - Adapter->call(impl->getHandleRef()); } device::device(const device_selector &deviceSelector) { diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index 634f22b09bafb..01ec2ef0caaba 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -22,16 +22,17 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext) { ur_kernel_handle_t hKernel = nullptr; ur_native_handle_t nativeHandle = reinterpret_cast(ClKernel); - Adapter->call( - nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(), - nullptr, nullptr, &hKernel); + ur_result_t Res = + Adapter->call_nocheck( + nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(), + nullptr, nullptr, &hKernel); + if (Res == UR_RESULT_ERROR_INVALID_CONTEXT) { + throw sycl::exception( + make_error_code(errc::invalid), + "Input context must be the same as the context of cl_kernel"); + } impl = std::make_shared( hKernel, detail::getSyclObjImpl(SyclContext), nullptr, nullptr); - // This is a special interop constructor for OpenCL, so the kernel must be - // retained. - if (get_backend() == backend::opencl) { - impl->getAdapter()->call(hKernel); - } } cl_kernel kernel::get() const { return impl->get(); } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 10867ea609f5e..f65b03a2acb80 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -67,17 +67,6 @@ queue::queue(cl_command_queue clQueue, const context &SyclContext, auto Context = detail::getSyclObjImpl(SyclContext); auto Adapter = sycl::detail::ur::getAdapter(); - // cl_device_id CLDevice; - // size_t Ret = clGetCommandQueueInfo(clQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), - // &CLDevice, nullptr); - // if (Ret) { - // throw runtime_error("Failed to retrieve device associated with the queue", - // PI_ERROR_INVALID_QUEUE); - // } - // sycl::detail::pi::PiDevice Device; - // Plugin->call( - // detail::pi::cast(CLDevice), nullptr, &Device); - ur_queue_native_properties_t Properties[] = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; Adapter->call( detail::ur::cast(clQueue), Context->getHandleRef(), diff --git a/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp b/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp index 582314335f6f3..afa6e59bd5367 100644 --- a/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp +++ b/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp @@ -33,15 +33,10 @@ int main() { clCreateProgramWithSource(native_context, 1, &source, nullptr, nullptr); std::cerr << "Build native program." << std::endl; clBuildProgram(p, 0, nullptr, nullptr, nullptr, nullptr); - std::cerr << "Release native context." << std::endl; - clReleaseContext(native_context); std::cerr << "Make kernel bundle." << std::endl; auto bundle = make_kernel_bundle( p, q.get_context()); - std::cerr << "Release native program." << std::endl; - // cl_program must have been retained by the above call. - clReleaseProgram(p); std::cerr << "Get native program." << std::endl; std::vector device_image = @@ -49,13 +44,9 @@ int main() { assert(device_image.size() == 1); std::cerr << "Create native kernel." << std::endl; cl_kernel k = clCreateKernel(device_image.front(), "do_nothing", nullptr); - // get_native must have retained cl_program as well. - clReleaseProgram(device_image.front()); std::cerr << "Make kernel." << std::endl; make_kernel(k, q.get_context()); - std::cerr << "Release native kernel." << std::endl; - clReleaseKernel(k); return 0; } diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index ba2ae917808d3..002aeaa8ba51e 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -121,8 +121,8 @@ TEST(GetNative, GetNativeHandle) { get_native(Event); get_native(Buffer); - // Depending on global caches state, urDeviceRetain is called either once or - // twice, so there'll be 6 or 7 calls. - ASSERT_EQ(TestCounter, 6 + DeviceRetainCounter - 1) + // Interop object shouldn't be owned by sycl. So, get_native shouldn't retain + // native handles. + ASSERT_EQ(TestCounter, 2 + DeviceRetainCounter - 1) << "Not all the retain methods were called"; } diff --git a/sycl/unittests/queue/InteropRetain.cpp b/sycl/unittests/queue/InteropRetain.cpp index c29d3b9e93c5a..23bf3d86627bc 100644 --- a/sycl/unittests/queue/InteropRetain.cpp +++ b/sycl/unittests/queue/InteropRetain.cpp @@ -23,25 +23,24 @@ ur_result_t redefinedQueueRetain(void *) { return UR_RESULT_SUCCESS; } -TEST(PiInteropTest, CheckRetain) { +TEST(UrInteropTest, CheckRetain) { sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); context Ctx{Plt.get_devices()[0]}; - // The queue construction should not call to urQueueRetain. Instead - // urQueueCreate should return the "retained" queue. + // The queue construction should not call to urQueueRetain. mock::getCallbacks().set_before_callback("urQueueRetain", &redefinedQueueRetain); queue Q{Ctx, default_selector()}; EXPECT_TRUE(QueueRetainCalled == 0); cl_command_queue OCLQ = get_native(Q); - EXPECT_TRUE(QueueRetainCalled == 1); + EXPECT_TRUE(QueueRetainCalled == 0); - // The make_queue should not call to urQueueRetain. The - // urQueueCreateWithNativeHandle should do the "retain" if needed. + // The make_queue should not call to urQueueRetain. + // Interop object shouldn't be owned by default in sycl. queue Q1 = make_queue(OCLQ, Ctx); - EXPECT_TRUE(QueueRetainCalled == 1); + EXPECT_TRUE(QueueRetainCalled == 0); } } // namespace From 5c296e339d65cd6811258370cc155890be4a3550 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 21 Oct 2024 09:19:07 +0100 Subject: [PATCH 3/4] Fix make_kernel e2e test to release native handles --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/include/sycl/backend.hpp | 2 +- sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 93f69fe1a4845..383e6915eb83c 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported # instead of returning misleading default value - set(UNIFIED_RUNTIME_TAG 7252088d76a910357dd3e00cb57e7a67fd3299bd) + set(UNIFIED_RUNTIME_TAG 411993dadaef65a10b3ee8140134e80b116a25b8) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index d8d7c86e4cffb..b7d3eaeedf22b 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -425,7 +425,7 @@ make_kernel_bundle(const typename backend_traits::template input_type< std::shared_ptr KBImpl = detail::make_kernel_bundle( detail::ur::cast(BackendObject), TargetContext, - false, State, Backend); + true /* KeepOwnership */, State, Backend); return detail::createSyclObjFromImpl>(KBImpl); } } // namespace _V1 diff --git a/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp b/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp index afa6e59bd5367..419586f9f4a6f 100644 --- a/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp +++ b/sycl/test-e2e/Plugin/interop-opencl-make-kernel-bundle.cpp @@ -48,5 +48,8 @@ int main() { std::cerr << "Make kernel." << std::endl; make_kernel(k, q.get_context()); + clReleaseProgram(p); + clReleaseKernel(k); + return 0; } From 41e18265ae2260cad3dad77028a88745bef6f23e Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 7 Nov 2024 13:58:09 +0000 Subject: [PATCH 4/4] [Revert] Testing commit --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- sycl/source/detail/buffer_impl.cpp | 5 +++-- sycl/source/queue.cpp | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index f598d52bb0501..a8d72788c00f6 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported # instead of returning misleading default value - set(UNIFIED_RUNTIME_TAG 18d3063ecae625ea11d88fa8b7e09fef73bab34b) + set(UNIFIED_RUNTIME_TAG 6806e8b65c6cd878b6194118b88144d4f68ff7e8) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index a7b68befa96bf..874543cabb343 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Wed Nov 6 16:45:49 2024 +0000 # Merge pull request #2276 from rafbiels/rafbiels/fix-hip-evbase # Set the right HIP device before creating base event counter -set(UNIFIED_RUNTIME_TAG 2858a8a28d0b6524a3b2b0e25a597d1c8295ce9d) +set(UNIFIED_RUNTIME_TAG 6806e8b65c6cd878b6194118b88144d4f68ff7e8) diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index e6b17265b9f8b..ee28c69aad3e8 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -52,8 +52,9 @@ void buffer_impl::addInteropObject( const AdapterPtr &Adapter = getAdapter(); ur_native_handle_t NativeHandle = 0; Adapter->call(MInteropMemObject, nullptr, - &NativeHandle); - if (std::find(Handles.begin(), Handles.end(), NativeHandle) == Handles.end()) { + &NativeHandle); + if (std::find(Handles.begin(), Handles.end(), NativeHandle) == + Handles.end()) { Handles.push_back(NativeHandle); } } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index f65b03a2acb80..3066a73eb5ebd 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -67,7 +67,8 @@ queue::queue(cl_command_queue clQueue, const context &SyclContext, auto Context = detail::getSyclObjImpl(SyclContext); auto Adapter = sycl::detail::ur::getAdapter(); - ur_queue_native_properties_t Properties[] = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; + ur_queue_native_properties_t Properties[] = { + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; Adapter->call( detail::ur::cast(clQueue), Context->getHandleRef(), nullptr, Properties, &hQueue);