From a689b8d64b8539e8f7f704d5d38698b733daf227 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 28 Aug 2024 13:56:51 -0700 Subject: [PATCH] [SYCL] Protect access to the native handle of a sycl::event (#15179) Fix for https://github.com/intel/llvm/issues/14623 Currently event_impl exposes reference to the underlying UR handle. As a result this handle can be updated/read at the random moments of time by different threads causing data race. This PR removes methods which expose the reference and replace them with thread-safe getter/setter. --- sycl/source/detail/event_impl.cpp | 81 ++++--- sycl/source/detail/event_impl.hpp | 19 +- sycl/source/detail/graph_impl.cpp | 5 +- sycl/source/detail/memory_manager.cpp | 9 +- sycl/source/detail/queue_impl.cpp | 15 +- sycl/source/detail/queue_impl.hpp | 5 +- sycl/source/detail/reduction.cpp | 6 +- sycl/source/detail/scheduler/commands.cpp | 207 ++++++++++-------- sycl/source/detail/scheduler/commands.hpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/handler.cpp | 4 +- sycl/test-e2e/ThreadSafety/event.cpp | 65 ++++++ sycl/unittests/buffer/BufferReleaseBase.cpp | 20 +- sycl/unittests/queue/USM.cpp | 2 +- .../scheduler/CommandsWaitForEvents.cpp | 2 +- .../scheduler/EnqueueWithDependsOnDeps.cpp | 14 +- sycl/unittests/scheduler/GraphCleanup.cpp | 4 +- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 2 +- sycl/unittests/scheduler/QueueFlushing.cpp | 12 +- 19 files changed, 294 insertions(+), 182 deletions(-) create mode 100644 sycl/test-e2e/ThreadSafety/event.cpp diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 951f983c2fc5e..4121c1884fd5a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -44,17 +44,19 @@ void event_impl::initContextIfNeeded() { event_impl::~event_impl() { try { - if (MEvent) - getPlugin()->call(urEventRelease, MEvent); + auto Handle = this->getHandle(); + if (Handle) + getPlugin()->call(urEventRelease, Handle); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~event_impl", e); } } void event_impl::waitInternal(bool *Success) { - if (!MIsHostEvent && MEvent) { + auto Handle = this->getHandle(); + if (!MIsHostEvent && Handle) { // Wait for the native event - ur_result_t Err = getPlugin()->call_nocheck(urEventWait, 1, &MEvent); + ur_result_t Err = getPlugin()->call_nocheck(urEventWait, 1, &Handle); // TODO drop the UR_RESULT_ERROR_UKNOWN from here (this was waiting for // https://github.com/oneapi-src/unified-runtime/issues/1459 which is now // closed). @@ -89,7 +91,7 @@ void event_impl::waitInternal(bool *Success) { } void event_impl::setComplete() { - if (MIsHostEvent || !MEvent) { + if (MIsHostEvent || !this->getHandle()) { { std::unique_lock lock(MMutex); #ifndef NDEBUG @@ -116,8 +118,11 @@ static uint64_t inline getTimestamp() { .count(); } -const ur_event_handle_t &event_impl::getHandleRef() const { return MEvent; } -ur_event_handle_t &event_impl::getHandleRef() { return MEvent; } +ur_event_handle_t event_impl::getHandle() const { return MEvent.load(); } + +void event_impl::setHandle(const ur_event_handle_t &UREvent) { + MEvent.store(UREvent); +} const ContextImplPtr &event_impl::getContextImpl() { initContextIfNeeded(); @@ -141,7 +146,7 @@ event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext) MIsFlushed(true), MState(HES_Complete) { ur_context_handle_t TempContext; - getPlugin()->call(urEventGetInfo, MEvent, UR_EVENT_INFO_CONTEXT, + getPlugin()->call(urEventGetInfo, this->getHandle(), UR_EVENT_INFO_CONTEXT, sizeof(ur_context_handle_t), &TempContext, nullptr); if (MContext->getHandleRef() != TempContext) { @@ -183,7 +188,7 @@ void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID, // Create a string with the event address so it // can be associated with other debug data xpti::utils::StringHelper SH; - Name = SH.nameWithAddress("event.wait", MEvent); + Name = SH.nameWithAddress("event.wait", this->getHandle()); // We can emit the wait associated with the graph if the // event does not have a command object or associated with @@ -249,9 +254,10 @@ void event_impl::wait(std::shared_ptr Self, TelemetryEvent = instrumentationProlog(Name, StreamID, IId); #endif - if (MEvent) - // presence of MEvent means the command has been enqueued, so no need to - // go via the slow path event waiting in the scheduler + auto EventHandle = getHandle(); + if (EventHandle) + // presence of the native handle means the command has been enqueued, so no + // need to go via the slow path event waiting in the scheduler waitInternal(Success); else if (MCommand) detail::Scheduler::getInstance().waitForEvent(Self, Success); @@ -294,7 +300,7 @@ event_impl::get_profiling_info() { // For profiling tag events we rely on the submission time reported as // the start time has undefined behavior. return get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); + this->getHandle(), this->getPlugin()); } // The delay between the submission and the actual start of a CommandBuffer @@ -311,10 +317,11 @@ event_impl::get_profiling_info() { // made by forcing the re-sync of submit time to start time is less than // 0.5ms. These timing values were obtained empirically using an integrated // Intel GPU). - if (MEventFromSubmittedExecCommandBuffer && !MIsHostEvent && MEvent) { + auto Handle = this->getHandle(); + if (MEventFromSubmittedExecCommandBuffer && !MIsHostEvent && Handle) { uint64_t StartTime = get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); + Handle, this->getPlugin()); if (StartTime < MSubmitTime) MSubmitTime = StartTime; } @@ -326,16 +333,17 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MIsHostEvent) { - if (MEvent) { + auto Handle = getHandle(); + if (Handle) { auto StartTime = get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); + Handle, this->getPlugin()); if (!MFallbackProfiling) { return StartTime; } else { auto DeviceBaseTime = get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); + Handle, this->getPlugin()); return MHostBaseTime - DeviceBaseTime + StartTime; } } @@ -353,16 +361,17 @@ template <> uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MIsHostEvent) { - if (MEvent) { + auto Handle = this->getHandle(); + if (Handle) { auto EndTime = get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); + Handle, this->getPlugin()); if (!MFallbackProfiling) { return EndTime; } else { auto DeviceBaseTime = get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); + Handle, this->getPlugin()); return MHostBaseTime - DeviceBaseTime + EndTime; } } @@ -377,8 +386,9 @@ uint64_t event_impl::get_profiling_info() { } template <> uint32_t event_impl::get_info() { - if (!MIsHostEvent && MEvent) { - return get_event_info(this->getHandleRef(), + auto Handle = this->getHandle(); + if (!MIsHostEvent && Handle) { + return get_event_info(Handle, this->getPlugin()); } return 0; @@ -392,9 +402,10 @@ event_impl::get_info() { if (!MIsHostEvent) { // Command is enqueued and UrEvent is ready - if (MEvent) + auto Handle = this->getHandle(); + if (Handle) return get_event_info( - this->getHandleRef(), this->getPlugin()); + Handle, this->getPlugin()); // Command is blocked and not enqueued, UrEvent is not assigned yet else if (MCommand) return sycl::info::event_command_status::submitted; @@ -471,17 +482,20 @@ ur_native_handle_t event_impl::getNative() { initContextIfNeeded(); auto Plugin = getPlugin(); - if (MIsDefaultConstructed && !MEvent) { + auto Handle = getHandle(); + if (MIsDefaultConstructed && !Handle) { auto TempContext = MContext.get()->getHandleRef(); ur_event_native_properties_t NativeProperties{}; + ur_event_handle_t UREvent = nullptr; Plugin->call(urEventCreateWithNativeHandle, 0, TempContext, - &NativeProperties, &MEvent); + &NativeProperties, &UREvent); + this->setHandle(UREvent); } if (MContext->getBackend() == backend::opencl) - Plugin->call(urEventRetain, getHandleRef()); - ur_native_handle_t Handle; - Plugin->call(urEventGetNativeHandle, getHandleRef(), &Handle); - return Handle; + Plugin->call(urEventRetain, Handle); + ur_native_handle_t OutHandle; + Plugin->call(urEventGetNativeHandle, Handle, &OutHandle); + return OutHandle; } std::vector event_impl::getWaitList() { @@ -505,7 +519,8 @@ std::vector event_impl::getWaitList() { void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { // Some events might not have a native handle underneath even at this point, // e.g. those produced by memset with 0 size (no UR call is made). - if (MIsFlushed || !MEvent) + auto Handle = this->getHandle(); + if (MIsFlushed || !Handle) return; QueueImplPtr Queue = MQueue.lock(); @@ -520,7 +535,7 @@ void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { // Check if the task for this event has already been submitted. ur_event_status_t Status = UR_EVENT_STATUS_QUEUED; - getPlugin()->call(urEventGetInfo, MEvent, + getPlugin()->call(urEventGetInfo, Handle, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(ur_event_status_t), &Status, nullptr); if (Status == UR_EVENT_STATUS_QUEUED) { diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index fa25073dcfc82..07c33acb2b054 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -126,16 +126,11 @@ class event_impl { /// Marks this event as completed. void setComplete(); - /// Returns raw interoperability event handle. Returned reference will be - /// invalid if event_impl was destroyed. - /// - /// \return a reference to an instance of plug-in event handle. - ur_event_handle_t &getHandleRef(); - /// Returns raw interoperability event handle. Returned reference will be - /// invalid if event_impl was destroyed. - /// - /// \return a const reference to an instance of plug-in event handle. - const ur_event_handle_t &getHandleRef() const; + /// Returns raw interoperability event handle. + ur_event_handle_t getHandle() const; + + /// Set event handle for this event object. + void setHandle(const ur_event_handle_t &UREvent); /// Returns context that is associated with this event. /// @@ -240,7 +235,7 @@ class event_impl { /// have native handle. /// /// @return true if no associated command and no event handle. - bool isNOP() { return !MCommand && !getHandleRef(); } + bool isNOP() { return !MCommand && !getHandle(); } /// Calling this function queries the current device timestamp and sets it as /// submission time for the command associated with this event. @@ -344,7 +339,7 @@ class event_impl { int32_t StreamID, uint64_t IId) const; void checkProfilingPreconditions() const; - ur_event_handle_t MEvent = nullptr; + std::atomic MEvent = nullptr; // Stores submission time of command associated with event uint64_t MSubmitTime = 0; uint64_t MHostBaseTime = 0; diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 085d0f8eeea9d..6836a86010cc4 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -910,7 +910,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, } NewEvent = CreateNewEvent(); - ur_event_handle_t *OutEvent = &NewEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; // Merge requirements from the nodes into requirements (if any) from the // handler. CGData.MRequirements.insert(CGData.MRequirements.end(), @@ -927,7 +927,8 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, } ur_result_t Res = Queue->getPlugin()->call_nocheck( urCommandBufferEnqueueExp, CommandBuffer, Queue->getHandleRef(), 0, - nullptr, OutEvent); + nullptr, &UREvent); + NewEvent->setHandle(UREvent); if (Res == UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES) { throw sycl::exception( make_error_code(errc::invalid), diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 3853b97652592..437db6c877e2a 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -124,10 +124,9 @@ static void waitForEvents(const std::vector &Events) { if (!Events.empty()) { const PluginPtr &Plugin = Events[0]->getPlugin(); std::vector UrEvents(Events.size()); - std::transform(Events.begin(), Events.end(), UrEvents.begin(), - [](const EventImplPtr &EventImpl) { - return EventImpl->getHandleRef(); - }); + std::transform( + Events.begin(), Events.end(), UrEvents.begin(), + [](const EventImplPtr &EventImpl) { return EventImpl->getHandle(); }); if (!UrEvents.empty() && UrEvents[0]) { Plugin->call(urEventWait, UrEvents.size(), &UrEvents[0]); } @@ -313,7 +312,7 @@ void *MemoryManager::allocateInteropMemObject( // If memory object is created with interop c'tor return cl_mem as is. assert(TargetContext == InteropContext && "Expected matching contexts"); - OutEventToWait = InteropEvent->getHandleRef(); + OutEventToWait = InteropEvent->getHandle(); // Retain the event since it will be released during alloca command // destruction if (nullptr != OutEventToWait) { diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index df8eb6aa10402..8b4e51f340aed 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -49,8 +49,9 @@ getUrEvents(const std::vector &DepEvents) { std::vector RetUrEvents; for (const sycl::event &Event : DepEvents) { const EventImplPtr &EventImpl = detail::getSyclObjImpl(Event); - if (EventImpl->getHandleRef() != nullptr) - RetUrEvents.push_back(EventImpl->getHandleRef()); + auto Handle = EventImpl->getHandle(); + if (Handle != nullptr) + RetUrEvents.push_back(Handle); } return RetUrEvents; } @@ -307,7 +308,7 @@ void queue_impl::addEvent(const event &Event) { } // As long as the queue supports urQueueFinish we only need to store events // for unenqueued commands and host tasks. - else if (MEmulateOOO || EImpl->getHandleRef() == nullptr) { + else if (MEmulateOOO || EImpl->getHandle() == nullptr) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); @@ -447,8 +448,10 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, auto EventImpl = detail::getSyclObjImpl(ResEvent); { NestedCallsTracker tracker; - MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), - &EventImpl->getHandleRef(), EventImpl); + ur_event_handle_t UREvent = nullptr; + MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &UREvent, + EventImpl); + EventImpl->setHandle(UREvent); } if (isInOrder()) { @@ -603,7 +606,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { EventImplWeakPtrIt->lock()) { // A nullptr UR event indicates that urQueueFinish will not cover it, // either because it's a host task event or an unenqueued one. - if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) { + if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandle()) { EventImplSharedPtr->wait(EventImplSharedPtr); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c8cd178c460f8..c5777c368145d 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -737,9 +737,10 @@ class queue_impl { template EventImplPtr insertHelperBarrier(const HandlerType &Handler) { auto ResEvent = std::make_shared(Handler.MQueue); + ur_event_handle_t UREvent = nullptr; getPlugin()->call(urEnqueueEventsWaitWithBarrier, - Handler.MQueue->getHandleRef(), 0, nullptr, - &ResEvent->getHandleRef()); + Handler.MQueue->getHandleRef(), 0, nullptr, &UREvent); + ResEvent->setHandle(UREvent); return ResEvent; } diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index a8a839b65b8f4..4ad7d207fe6ec 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -172,8 +172,10 @@ addCounterInit(handler &CGH, std::shared_ptr &Queue, auto EventImpl = std::make_shared(Queue); EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context())); EventImpl->setStateIncomplete(); - MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, - &EventImpl->getHandleRef(), EventImpl); + ur_event_handle_t UREvent = nullptr; + MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, &UREvent, + EventImpl); + EventImpl->setHandle(UREvent); CGH.depends_on(createSyclObjFromImpl(EventImpl)); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8f4c536fa0691..09f745c5fc4d8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -236,7 +236,8 @@ std::vector Command::getUrEvents(const std::vector &EventImpls) const { std::vector RetUrEvents; for (auto &EventImpl : EventImpls) { - if (EventImpl->getHandleRef() == nullptr) + auto Handle = EventImpl->getHandle(); + if (Handle == nullptr) continue; // Do not add redundant event dependencies for in-order queues. @@ -247,7 +248,7 @@ Command::getUrEvents(const std::vector &EventImpls) const { MWorkerQueue->isInOrder() && !isHostTask()) continue; - RetUrEvents.push_back(EventImpl->getHandleRef()); + RetUrEvents.push_back(Handle); } return RetUrEvents; @@ -286,7 +287,7 @@ std::vector Command::getUrEventsBlocking( MWorkerQueue->isInOrder() && !isHostTask()) continue; - RetUrEvents.push_back(EventImpl->getHandleRef()); + RetUrEvents.push_back(EventImpl->getHandle()); } return RetUrEvents; @@ -830,7 +831,7 @@ Command *Command::addDep(EventImplPtr Event, // We need this for just the instrumentation, so guarding it will prevent // unused variable warnings when instrumentation is turned off Command *Cmd = (Command *)Event->getCommand(); - ur_event_handle_t &UrEventAddr = Event->getHandleRef(); + ur_event_handle_t UrEventAddr = Event->getHandle(); // Now make an edge for the dependent event emitEdgeEventForEventDependence(Cmd, UrEventAddr); #endif @@ -839,7 +840,7 @@ Command *Command::addDep(EventImplPtr Event, ToCleanUp); } -void Command::emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr) { +void Command::emitEnqueuedEventSignal(const ur_event_handle_t UrEventAddr) { #ifdef XPTI_ENABLE_INSTRUMENTATION emitInstrumentationGeneral( MStreamID, MInstanceID, static_cast(MTraceEvent), @@ -928,7 +929,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, else { MEvent->setEnqueued(); if (MShouldCompleteEventIfPossible && - (MEvent->isHost() || MEvent->getHandleRef() == nullptr)) + (MEvent->isHost() || MEvent->getHandle() == nullptr)) MEvent->setComplete(); // Consider the command is successfully enqueued if return code is @@ -944,7 +945,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, } // Emit this correlation signal before the task end - emitEnqueuedEventSignal(MEvent->getHandleRef()); + emitEnqueuedEventSignal(MEvent->getHandle()); #ifdef XPTI_ENABLE_INSTRUMENTATION emitInstrumentation(xpti::trace_task_end, nullptr); #endif @@ -1096,14 +1097,15 @@ ur_result_t AllocaCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; void *HostPtr = nullptr; if (!MIsLeaderAlloca) { if (!MQueue) { // Do not need to make allocation if we have a linked device allocation - Command::waitForEvents(MQueue, EventImpls, Event); + Command::waitForEvents(MQueue, EventImpls, UREvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1113,8 +1115,8 @@ ur_result_t AllocaCommand::enqueueImp() { // delete it RawEvents below. MMemAllocation = MemoryManager::allocate(getContext(MQueue), getSYCLMemObj(), MInitFromUserData, HostPtr, - std::move(EventImpls), Event); - + std::move(EventImpls), UREvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1186,12 +1188,13 @@ void *AllocaSubBufCommand::getMemAllocation() const { ur_result_t AllocaSubBufCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; MMemAllocation = MemoryManager::allocateMemSubBuffer( getContext(MQueue), MParentAlloca->getMemAllocation(), MRequirement.MElemSize, MRequirement.MOffsetInBytes, - MRequirement.MAccessRange, std::move(EventImpls), Event); + MRequirement.MAccessRange, std::move(EventImpls), UREvent); + MEvent->setHandle(UREvent); XPTIRegistry::bufferAssociateNotification(MParentAlloca->getSYCLMemObj(), MMemAllocation); @@ -1280,7 +1283,7 @@ ur_result_t ReleaseCommand::enqueueImp() { EventImplPtr UnmapEventImpl(new event_impl(Queue)); UnmapEventImpl->setContextImpl(getContext(Queue)); UnmapEventImpl->setStateIncomplete(); - ur_event_handle_t &UnmapEvent = UnmapEventImpl->getHandleRef(); + ur_event_handle_t UREvent = nullptr; void *Src = CurAllocaIsHost ? MAllocaCmd->getMemAllocation() @@ -1291,20 +1294,21 @@ ur_result_t ReleaseCommand::enqueueImp() { : MAllocaCmd->MLinkedAllocaCmd->getMemAllocation(); MemoryManager::unmap(MAllocaCmd->getSYCLMemObj(), Dst, Queue, Src, - RawEvents, UnmapEvent); - + RawEvents, UREvent); + UnmapEventImpl->setHandle(UREvent); std::swap(MAllocaCmd->MIsActive, MAllocaCmd->MLinkedAllocaCmd->MIsActive); EventImpls.clear(); EventImpls.push_back(UnmapEventImpl); } - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; if (SkipRelease) - Command::waitForEvents(MQueue, EventImpls, Event); + Command::waitForEvents(MQueue, EventImpls, UREvent); else { MemoryManager::release(getContext(MQueue), MAllocaCmd->getSYCLMemObj(), MAllocaCmd->getMemAllocation(), - std::move(EventImpls), Event); + std::move(EventImpls), UREvent); } + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1366,12 +1370,12 @@ ur_result_t MapMemObject::enqueueImp() { std::vector RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; *MDstPtr = MemoryManager::map( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MQueue, MMapMode, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, - MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), Event); - + MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), UREvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1436,7 +1440,7 @@ bool UnMapMemObject::producesPiEvent() const { // restores the old behavior in this case until this is resolved. return MQueue && (MQueue->getDeviceImplPtr()->getBackend() != backend::ext_oneapi_level_zero || - MEvent->getHandleRef() != nullptr); + MEvent->getHandle() != nullptr); } ur_result_t UnMapMemObject::enqueueImp() { @@ -1445,10 +1449,11 @@ ur_result_t UnMapMemObject::enqueueImp() { std::vector RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), MDstAllocaCmd->getMemAllocation(), MQueue, *MSrcPtr, - std::move(RawEvents), Event); + std::move(RawEvents), UREvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1538,14 +1543,14 @@ bool MemCpyCommand::producesPiEvent() const { return !MQueue || MQueue->getDeviceImplPtr()->getBackend() != backend::ext_oneapi_level_zero || - MEvent->getHandleRef() != nullptr; + MEvent->getHandle() != nullptr; } ur_result_t MemCpyCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; auto RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); @@ -1555,8 +1560,9 @@ ur_result_t MemCpyCommand::enqueueImp() { MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize, MDstAllocaCmd->getMemAllocation(), MQueue, MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, - MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), Event, MEvent); - + MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), UREvent, + MEvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1605,8 +1611,9 @@ void ExecCGCommand::clearAuxiliaryResources() { ur_result_t UpdateHostRequirementCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - ur_event_handle_t &Event = MEvent->getHandleRef(); - Command::waitForEvents(MQueue, EventImpls, Event); + ur_event_handle_t UREvent = nullptr; + Command::waitForEvents(MQueue, EventImpls, UREvent); + MEvent->setHandle(UREvent); assert(MSrcAllocaCmd && "Expected valid alloca command"); assert(MSrcAllocaCmd->getMemAllocation() && "Expected valid source pointer"); @@ -1693,13 +1700,13 @@ ur_result_t MemCpyCommandHost::enqueueImp() { std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getUrEvents(EventImpls); - ur_event_handle_t &Event = MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; // Omit copying if mode is discard one. // TODO: Handle this at the graph building time by, for example, creating // empty node instead of memcpy. if (MDstReq.MAccessMode == access::mode::discard_read_write || MDstReq.MAccessMode == access::mode::discard_write) { - Command::waitForEvents(Queue, EventImpls, Event); + Command::waitForEvents(Queue, EventImpls, UREvent); return UR_RESULT_SUCCESS; } @@ -1712,8 +1719,8 @@ ur_result_t MemCpyCommandHost::enqueueImp() { MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, - MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), - MEvent); + MDstReq.MElemSize, std::move(RawEvents), UREvent, MEvent); + MEvent->setHandle(UREvent); } catch (sycl::exception &e) { return static_cast(get_ur_error(e)); } @@ -1727,8 +1734,9 @@ EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) { ur_result_t EmptyCommand::enqueueImp() { waitForPreparedHostEvents(); - waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); - + ur_event_handle_t UREvent = nullptr; + waitForEvents(MQueue, MPreparedDepsEvents, UREvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -2407,13 +2415,19 @@ static ur_result_t SetKernelParamsAndLaunch( launch_property_value_cooperative}); } - return Plugin->call_nocheck( + ur_event_handle_t UREvent = nullptr; + ur_result_t Error = Plugin->call_nocheck( urEnqueueKernelLaunchCustomExp, Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalSize[0], LocalSize, property_list.size(), property_list.data(), RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], - OutEventImpl ? &OutEventImpl->getHandleRef() : nullptr); + OutEventImpl ? &UREvent : nullptr); + if (OutEventImpl) { + OutEventImpl->setHandle(UREvent); + } + return Error; } + ur_event_handle_t UREvent = nullptr; ur_result_t Error = [&](auto... Args) { if (IsCooperative) { @@ -2424,7 +2438,11 @@ static ur_result_t SetKernelParamsAndLaunch( }(Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], - OutEventImpl ? &OutEventImpl->getHandleRef() : nullptr); + OutEventImpl ? &UREvent : nullptr); + if (Error == UR_RESULT_SUCCESS && OutEventImpl) { + OutEventImpl->setHandle(UREvent); + } + return Error; } @@ -2682,7 +2700,8 @@ ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, ur_queue_handle_t ur_q = Queue->getHandleRef(); ur_result_t Error; - auto OutEvent = OutEventImpl ? &OutEventImpl->getHandleRef() : nullptr; + ur_event_handle_t UREvent = nullptr; + auto OutEvent = OutEventImpl ? &UREvent : nullptr; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); if (read) { @@ -2696,7 +2715,9 @@ ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, size, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], OutEvent); } - + if (Error == UR_RESULT_SUCCESS && OutEventImpl) { + OutEventImpl->setHandle(UREvent); + } return Error; } @@ -2715,14 +2736,6 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { MQueue->getPlugin()->call(urEventWait, RawEvents.size(), &RawEvents[0]); } - // We can omit creating a UR event and create a "discarded" event if either - // the queue has the discard property or the command has been explicitly - // marked as not needing an event, e.g. if the user did not ask for one, and - // if the queue supports discarded UR event and there are no requirements. - bool DiscardUrEvent = (MQueue->MDiscardEvents || !MEventNeeded) && - MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0; - ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &MEvent->getHandleRef(); ur_exp_command_buffer_sync_point_t OutSyncPoint; ur_exp_command_buffer_command_handle_t OutCommand = nullptr; switch (MCommandGroup->getType()) { @@ -2734,16 +2747,6 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { return AllocaCmd->getMemAllocation(); }; - if (!Event) { - // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = - !(ExecKernel->MSyclKernel && ExecKernel->MSyclKernel->isInterop()) && - ProgramManager::getInstance().kernelUsesAssert( - ExecKernel->MKernelName); - if (KernelUsesAssert) { - Event = &MEvent->getHandleRef(); - } - } auto result = enqueueImpCommandBufferKernel( MQueue->get_context(), MQueue->getDeviceImplPtr(), MCommandBuffer, *ExecKernel, MSyncPointDeps, &OutSyncPoint, &OutCommand, @@ -2877,7 +2880,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MQueue->supportsDiscardingPiEvents() && MCommandGroup->getRequirements().size() == 0; - ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &MEvent->getHandleRef(); + ur_event_handle_t UREvent = nullptr; + ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &UREvent; detail::EventImplPtr EventImpl = DiscardUrEvent ? nullptr : MEvent; switch (MCommandGroup->getType()) { @@ -2897,7 +2901,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, Copy->getDst(), nullptr, Req->MDims, Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize, - std::move(RawEvents), MEvent->getHandleRef(), MEvent); + std::move(RawEvents), UREvent, MEvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -2906,13 +2911,13 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Requirement *Req = (Requirement *)(Copy->getDst()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - MemoryManager::copy( - AllocaCmd->getSYCLMemObj(), Copy->getSrc(), nullptr, Req->MDims, - Req->MAccessRange, Req->MAccessRange, - /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(), - MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, - Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); - + MemoryManager::copy(AllocaCmd->getSYCLMemObj(), Copy->getSrc(), nullptr, + Req->MDims, Req->MAccessRange, Req->MAccessRange, + /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, + AllocaCmd->getMemAllocation(), MQueue, Req->MDims, + Req->MMemoryRange, Req->MAccessRange, Req->MOffset, + Req->MElemSize, std::move(RawEvents), UREvent, MEvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } case CGType::CopyAccToAcc: { @@ -2928,9 +2933,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { ReqSrc->MDims, ReqSrc->MMemoryRange, ReqSrc->MAccessRange, ReqSrc->MOffset, ReqSrc->MElemSize, AllocaCmdDst->getMemAllocation(), MQueue, ReqDst->MDims, ReqDst->MMemoryRange, ReqDst->MAccessRange, - ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), - MEvent->getHandleRef(), MEvent); - + ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), UREvent, + MEvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } case CGType::Fill: { @@ -2942,8 +2947,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(), MQueue, Fill->MPattern.size(), Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, - std::move(RawEvents), MEvent->getHandleRef(), MEvent); - + std::move(RawEvents), UREvent, MEvent); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } case CGType::Kernel: { @@ -2993,7 +2998,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::copy_usm(Copy->getSrc(), MQueue, Copy->getLength(), Copy->getDst(), std::move(RawEvents), Event, MEvent); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::FillUSM: { @@ -3001,7 +3007,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), Fill->getPattern(), std::move(RawEvents), Event, MEvent); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::PrefetchUSM: { @@ -3009,7 +3016,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::prefetch_usm(Prefetch->getDst(), MQueue, Prefetch->getLength(), std::move(RawEvents), Event, MEvent); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::AdviseUSM: { @@ -3017,7 +3025,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::advise_usm(Advise->getDst(), MQueue, Advise->getLength(), Advise->getAdvice(), std::move(RawEvents), Event, MEvent); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::Copy2DUSM: { @@ -3026,6 +3035,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Copy->getDst(), Copy->getDstPitch(), Copy->getWidth(), Copy->getHeight(), std::move(RawEvents), Event, MEvent); + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::Fill2DUSM: { @@ -3034,6 +3045,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Fill->getWidth(), Fill->getHeight(), Fill->getPattern(), std::move(RawEvents), Event, MEvent); + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::Memset2DUSM: { @@ -3042,6 +3055,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Memset->getWidth(), Memset->getHeight(), Memset->getValue(), std::move(RawEvents), Event, MEvent); + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::CodeplayHostTask: { @@ -3182,7 +3197,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { InteropFreeFunc, &CustomOpData, ReqMems.size(), ReqMems.data(), nullptr, RawEvents.size(), RawEvents.data(), Event); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::Barrier: { @@ -3192,7 +3208,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MEvent->setHostEnqueueTime(); Plugin->call(urEnqueueEventsWaitWithBarrier, MQueue->getHandleRef(), 0, nullptr, Event); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::BarrierWaitlist: { @@ -3209,7 +3226,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MEvent->setHostEnqueueTime(); Plugin->call(urEnqueueEventsWaitWithBarrier, MQueue->getHandleRef(), UrEvents.size(), &UrEvents[0], Event); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::ProfilingTag: { @@ -3233,7 +3251,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { /*blocking=*/false, /*num_events_in_wait_list=*/0, /*event_wait_list=*/nullptr, Event); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::CopyToDeviceGlobal: { @@ -3242,7 +3261,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Copy->getDeviceGlobalPtr(), Copy->isDeviceImageScoped(), MQueue, Copy->getNumBytes(), Copy->getOffset(), Copy->getSrc(), std::move(RawEvents), Event, MEvent); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::CopyFromDeviceGlobal: { @@ -3252,7 +3272,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Copy->getDeviceGlobalPtr(), Copy->isDeviceImageScoped(), MQueue, Copy->getNumBytes(), Copy->getOffset(), Copy->getDest(), std::move(RawEvents), Event, MEvent); - + if (Event) + MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; } case CGType::ReadWriteHostPipe: { @@ -3277,10 +3298,14 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { static_cast(MCommandGroup.get()); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - return MQueue->getPlugin()->call_nocheck( + ur_result_t Err = MQueue->getPlugin()->call_nocheck( urCommandBufferEnqueueExp, CmdBufferCG->MCommandBuffer, MQueue->getHandleRef(), RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], Event); + if (Event) + MEvent->setHandle(*Event); + + return Err; } case CGType::CopyImage: { CGCopyImage *Copy = (CGCopyImage *)MCommandGroup.get(); @@ -3290,6 +3315,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { Copy->getDstDesc(), Copy->getSrcFormat(), Copy->getDstFormat(), Copy->getCopyFlags(), Copy->getSrcOffset(), Copy->getDstOffset(), Copy->getCopyExtent(), std::move(RawEvents), Event); + if (Event) + MEvent->setHandle(*Event); + return UR_RESULT_SUCCESS; } case CGType::SemaphoreWait: { @@ -3366,7 +3394,9 @@ bool KernelFusionCommand::producesPiEvent() const { return false; } ur_result_t KernelFusionCommand::enqueueImp() { waitForPreparedHostEvents(); - waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); + ur_event_handle_t UREvent = nullptr; + waitForEvents(MQueue, MPreparedDepsEvents, UREvent); + MEvent->setHandle(UREvent); // We need to release the queue here because KernelFusionCommands are // held back by the scheduler thus prevent the deallocation of the queue. @@ -3477,8 +3507,9 @@ UpdateCommandBufferCommand::UpdateCommandBufferCommand( ur_result_t UpdateCommandBufferCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - ur_event_handle_t &Event = MEvent->getHandleRef(); - Command::waitForEvents(MQueue, EventImpls, Event); + ur_event_handle_t UREvent = nullptr; + Command::waitForEvents(MQueue, EventImpls, UREvent); + MEvent->setHandle(UREvent); for (auto &Node : MNodes) { auto CG = static_cast(Node->MCommandGroup.get()); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 5b8e204e4733b..c7efe9aeb2aaf 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -199,7 +199,7 @@ class Command { void emitEdgeEventForEventDependence(Command *Cmd, ur_event_handle_t &EventAddr); /// Creates a signal event with the enqueued kernel event handle. - void emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr); + void emitEnqueuedEventSignal(const ur_event_handle_t UrEventAddr); /// Create a trace event of node_create type; this must be guarded by a /// check for xptiTraceEnabled(). /// Post Condition: MTraceEvent will be set to the event created. diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index aafe58b4f8ffe..95dd8e78522fe 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -741,7 +741,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, // A nullptr here means that the commmand does not produce a UR event or it // hasn't been enqueued yet. - return SyclEventImplPtr->getHandleRef() != nullptr; + return SyclEventImplPtr->getHandle() != nullptr; } bool Scheduler::areEventsSafeForSchedulerBypass( diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 33431ebd4b47b..5ceb4724d3485 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -293,7 +293,7 @@ event handler::finalize() { if (NewEvent != nullptr) { detail::emitInstrumentationGeneral( StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal, - static_cast(NewEvent->getHandleRef())); + static_cast(NewEvent->getHandle())); } detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr); @@ -324,7 +324,7 @@ event handler::finalize() { NewEvent->setSubmissionTime(); EnqueueKernel(); - if (NewEvent->isHost() || NewEvent->getHandleRef() == nullptr) + if (NewEvent->isHost() || NewEvent->getHandle() == nullptr) NewEvent->setComplete(); NewEvent->setEnqueued(); diff --git a/sycl/test-e2e/ThreadSafety/event.cpp b/sycl/test-e2e/ThreadSafety/event.cpp new file mode 100644 index 0000000000000..4d3e6a6f28299 --- /dev/null +++ b/sycl/test-e2e/ThreadSafety/event.cpp @@ -0,0 +1,65 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// This test checks thread-safety of sycl::event's native handle data member. +// To do that we create a host task and a kernel task which depends on the host +// task. After submissions we yield in the main thread to let the host task to +// work and result in creation of kernel event's handle and start checking the +// status of the kernel event in a loop to catch the moment when handle is +// modified. If read and modification of sycl::event's handle is not thread-safe +// then this results in a segfault. + +#include +#include +#include +#include + +int main() { + // Create a SYCL queue + sycl::queue queue; + if (!queue.get_device().has(sycl::aspect::usm_shared_allocations)) + return 0; + + // Define the size of the buffers + static constexpr size_t size = 1024; + + // Allocate USM memory for source and destination buffers + int *src = sycl::malloc_shared(size, queue); + int *dst = sycl::malloc_shared(size, queue); + + // Initialize the source buffer with some data + for (size_t i = 0; i < size; ++i) { + src[i] = i; + } + + auto host_task_event = queue.submit([&](sycl::handler &cgh) { + cgh.host_task([=]() { + // Do some work in the host task + std::cout << "Host task is executing." << std::endl; + memcpy(dst, src, size * sizeof(int)); + std::cout << "Host task completed." << std::endl; + }); + }); + + sycl::event kernel_event = queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(host_task_event); + cgh.memcpy(dst, src, size * sizeof(int)); + }); + + // Let host task thread to work which will result in kernel_event's handle to + // be created at some random moment. + std::this_thread::yield(); + // Use number of iterations large enough to catch the moment when handle is + // modifed. + for (int i = 0; i < 100000; i++) { + std::ignore = + kernel_event.get_info(); + } + + kernel_event.wait(); + + // Free the USM memory + sycl::free(src, queue); + sycl::free(dst, queue); + return 0; +} diff --git a/sycl/unittests/buffer/BufferReleaseBase.cpp b/sycl/unittests/buffer/BufferReleaseBase.cpp index 172b005bf60af..8b0840300d235 100644 --- a/sycl/unittests/buffer/BufferReleaseBase.cpp +++ b/sycl/unittests/buffer/BufferReleaseBase.cpp @@ -227,12 +227,12 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { ReadCmd = new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); // These dummy handles are automatically cleaned up by the runtime - ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( - mock::createDummyHandle()); + ReadCmd->getEvent()->setHandle(reinterpret_cast( + mock::createDummyHandle())); WriteCmd = new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); - WriteCmd->getEvent()->getHandleRef() = reinterpret_cast( - mock::createDummyHandle()); + WriteCmd->getEvent()->setHandle(reinterpret_cast( + mock::createDummyHandle())); ReadCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; WriteCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; @@ -247,23 +247,23 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { &replaceEventGetInfo); testing::InSequence S; - ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = + ExpectedEventStatus[ReadCmd->getEvent()->getHandle()] = UR_EVENT_STATUS_SUBMITTED; - ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = + ExpectedEventStatus[WriteCmd->getEvent()->getHandle()] = UR_EVENT_STATUS_SUBMITTED; EXPECT_FALSE(MockSchedulerPtr->checkLeavesCompletion(Rec)); - ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = + ExpectedEventStatus[ReadCmd->getEvent()->getHandle()] = UR_EVENT_STATUS_COMPLETE; - ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = + ExpectedEventStatus[WriteCmd->getEvent()->getHandle()] = UR_EVENT_STATUS_SUBMITTED; EXPECT_FALSE(MockSchedulerPtr->checkLeavesCompletion(Rec)); - ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = + ExpectedEventStatus[ReadCmd->getEvent()->getHandle()] = UR_EVENT_STATUS_COMPLETE; - ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = + ExpectedEventStatus[WriteCmd->getEvent()->getHandle()] = UR_EVENT_STATUS_COMPLETE; EXPECT_TRUE(MockSchedulerPtr->checkLeavesCompletion(Rec)); // previous expect_call is still valid and will generate failure if we recieve diff --git a/sycl/unittests/queue/USM.cpp b/sycl/unittests/queue/USM.cpp index 28a6f589b6650..26518f72a8db9 100644 --- a/sycl/unittests/queue/USM.cpp +++ b/sycl/unittests/queue/USM.cpp @@ -25,7 +25,7 @@ ur_event_handle_t MEMCPY = nullptr; ur_event_handle_t MEMSET = nullptr; template auto getVal(T obj) { - return detail::getSyclObjImpl(obj)->getHandleRef(); + return detail::getSyclObjImpl(obj)->getHandle(); } ur_result_t redefinedEnqueueEventsWaitAfter(void *pParams) { diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp index d7ffcbb059c0b..8bb19359b8b8f 100644 --- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp +++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp @@ -190,7 +190,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) { ur_event_handle_t UREvent = mock::createDummyHandle(); auto EventImpl = std::make_shared(QueueImpl); - EventImpl->getHandleRef() = UREvent; + EventImpl->setHandle(UREvent); QueueImplProxy->registerStreamServiceEvent(EventImpl); diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 5f360aa8c766d..d99ce5f6e0f3f 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -113,7 +113,7 @@ class DependsOnTests : public ::testing::Test { ASSERT_EQ(PassedNumEvents.size(), 1u); auto [EventCount, EventArr] = PassedNumEvents[0]; ASSERT_EQ(EventCount, 1u); - EXPECT_EQ(*EventArr, Cmd3Event->getHandleRef()); + EXPECT_EQ(*EventArr, Cmd3Event->getHandle()); } void VerifyBlockedCommandsEnqueue( @@ -339,7 +339,7 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { }); std::shared_ptr SingleTaskEventImpl = detail::getSyclObjImpl(SingleTaskEvent); - EXPECT_EQ(SingleTaskEventImpl->getHandleRef(), nullptr); + EXPECT_EQ(SingleTaskEventImpl->getHandle(), nullptr); Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueSuccess; EventsInWaitList.clear(); @@ -352,9 +352,9 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { QueueDevImpl->get_context()); auto ShortcutFuncEvent = Queue.memcpy( SecondBuf, FirstBuf, sizeof(int) * ArraySize, {SingleTaskEvent}); - EXPECT_NE(SingleTaskEventImpl->getHandleRef(), nullptr); + EXPECT_NE(SingleTaskEventImpl->getHandle(), nullptr); ASSERT_EQ(EventsInWaitList.size(), 1u); - EXPECT_EQ(EventsInWaitList[0], SingleTaskEventImpl->getHandleRef()); + EXPECT_EQ(EventsInWaitList[0], SingleTaskEventImpl->getHandle()); Queue.wait(); sycl::free(FirstBuf, Queue); sycl::free(SecondBuf, Queue); @@ -381,15 +381,15 @@ TEST_F(DependsOnTests, BarrierWithWaitList) { }); std::shared_ptr SingleTaskEventImpl = detail::getSyclObjImpl(SingleTaskEvent); - EXPECT_EQ(SingleTaskEventImpl->getHandleRef(), nullptr); + EXPECT_EQ(SingleTaskEventImpl->getHandle(), nullptr); Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueSuccess; EventsInWaitList.clear(); Queue.ext_oneapi_submit_barrier(std::vector{SingleTaskEvent}); - EXPECT_NE(SingleTaskEventImpl->getHandleRef(), nullptr); + EXPECT_NE(SingleTaskEventImpl->getHandle(), nullptr); ASSERT_EQ(EventsInWaitList.size(), 1u); - EXPECT_EQ(EventsInWaitList[0], SingleTaskEventImpl->getHandleRef()); + EXPECT_EQ(EventsInWaitList[0], SingleTaskEventImpl->getHandle()); Queue.wait(); } } // anonymous namespace diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index d0ab78a2b54d6..c0e1dc136a2d8 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -154,8 +154,8 @@ static void checkCleanupOnEnqueue(MockScheduler &MS, // Check addCopyBack MockCmd = addNewMockCmds(); - LeafMockCmd->getEvent()->getHandleRef() = - reinterpret_cast(new int{}); + LeafMockCmd->getEvent()->setHandle( + reinterpret_cast(new int{})); MS.addCopyBack(&MockReq); verifyCleanup(Record, AllocaCmd, MockCmd, CommandDeleted); diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 7e8bd6dd4c9a0..4e53f0ed73a18 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -119,7 +119,7 @@ TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) { { event E1 = submitKernel(Q1); event E2 = submitKernel(Q2); - ExpectedEvent = detail::getSyclObjImpl(E2)->getHandleRef(); + ExpectedEvent = detail::getSyclObjImpl(E2)->getHandle(); Q1.ext_oneapi_submit_barrier({E1, E2}); EXPECT_TRUE(BarrierCalled); } diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index 3a513ca98079c..82cda17f1fa95 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -54,7 +54,7 @@ static void addDepAndEnqueue(detail::Command *Cmd, ur_event_handle_t UREvent = mock::createDummyHandle(); - DepCmd.getEvent()->getHandleRef() = UREvent; + DepCmd.getEvent()->setHandle(UREvent); (void)Cmd->addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}, ToCleanUp); detail::EnqueueResultT Res; @@ -154,7 +154,7 @@ TEST_F(SchedulerTest, QueueFlushing) { ur_event_handle_t UREvent = mock::createDummyHandle(); - DepEvent->getHandleRef() = UREvent; + DepEvent->setHandle(UREvent); (void)Cmd.addDep(DepEvent, ToCleanUp); MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); EXPECT_TRUE(QueueFlushed); @@ -174,7 +174,7 @@ TEST_F(SchedulerTest, QueueFlushing) { ur_event_handle_t UREvent = mock::createDummyHandle(); - DepEvent->getHandleRef() = UREvent; + DepEvent->setHandle(UREvent); } (void)Cmd.addDep(DepEvent, ToCleanUp); MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); @@ -198,13 +198,13 @@ TEST_F(SchedulerTest, QueueFlushing) { ur_event_handle_t UREvent = mock::createDummyHandle(); - DepCmdA.getEvent()->getHandleRef() = UREvent; + DepCmdA.getEvent()->setHandle(UREvent); (void)Cmd.addDep(detail::DepDesc{&DepCmdA, &MockReq, nullptr}, ToCleanUp); MockCommand DepCmdB(QueueImplB); UREvent = mock::createDummyHandle(); - DepCmdB.getEvent()->getHandleRef() = UREvent; + DepCmdB.getEvent()->setHandle(UREvent); (void)Cmd.addDep(detail::DepDesc{&DepCmdB, &MockReq, nullptr}, ToCleanUp); // The check is performed in redefinedQueueFlush MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); @@ -219,7 +219,7 @@ TEST_F(SchedulerTest, QueueFlushing) { ur_event_handle_t UREvent = mock::createDummyHandle(); - DepCmd.getEvent()->getHandleRef() = UREvent; + DepCmd.getEvent()->setHandle(UREvent); (void)CmdA.addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}, ToCleanUp); MockScheduler::enqueueCommand(&CmdA, Res, detail::NON_BLOCKING);