diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 9519067a00484..b0fe95f4e0c0c 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -519,6 +519,8 @@ The types of commands which are unsupported, and lead to this exception are: This corresponds to a memory buffer write command. * `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and `dest` are USM pointers. This corresponds to a USM copy command. +* `handler::fill(ptr, pattern, count)` - This corresponds to a USM memory + fill command. * `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory fill command. * `handler::prefetch()`. diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/include/sycl/detail/cg.hpp index 5c2ffdc712c00..08edae99c506f 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/include/sycl/detail/cg.hpp @@ -299,7 +299,7 @@ class CGFillUSM : public CG { MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } - int getFill() { return MPattern[0]; } + const std::vector &getPattern() { return MPattern; } }; /// "Prefetch USM" command group class. diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 195beb0a24861..b49f43d3d6ce6 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -128,7 +128,7 @@ _PI_API(piextUSMHostAlloc) _PI_API(piextUSMDeviceAlloc) _PI_API(piextUSMSharedAlloc) _PI_API(piextUSMFree) -_PI_API(piextUSMEnqueueMemset) +_PI_API(piextUSMEnqueueFill) _PI_API(piextUSMEnqueueMemcpy) _PI_API(piextUSMEnqueuePrefetch) _PI_API(piextUSMEnqueueMemAdvise) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 204fb56b67c15..2a3b832d2df18 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -172,9 +172,10 @@ // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D +// 16.51 Replaced piextUSMEnqueueMemset with piextUSMEnqueueFill -#define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 50 +#define _PI_H_VERSION_MAJOR 16 +#define _PI_H_VERSION_MINOR 51 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -2060,22 +2061,22 @@ __SYCL_EXPORT pi_result piextUSMPitchedAlloc( /// \param ptr is the memory to be freed __SYCL_EXPORT pi_result piextUSMFree(pi_context context, void *ptr); -/// USM Memset API +/// USM Fill API /// /// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// \param count is the size in bytes to memset +/// \param ptr is the ptr to fill +/// \param pattern is the ptr with the bytes of the pattern to set +/// \param patternSize is the size in bytes of the pattern to set +/// \param count is the size in bytes to fill /// \param num_events_in_waitlist is the number of events to wait on /// \param events_waitlist is an array of events to wait on /// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, - pi_int32 value, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); +__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, + const void *pattern, + size_t patternSize, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// USM Memcpy API /// diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 0a96126456439..44b44b766b831 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2867,14 +2867,9 @@ class __SYCL_EXPORT handler { /// device copyable. /// \param Count is the number of times to fill Pattern into Ptr. template void fill(void *Ptr, const T &Pattern, size_t Count) { - throwIfActionIsCreated(); - setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); static_assert(is_device_copyable::value, "Pattern must be device copyable"); - parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { - T *CastedPtr = static_cast(Ptr); - CastedPtr[Index] = Pattern; - }); + this->fill_impl(Ptr, &Pattern, sizeof(T), Count); } /// Prevents any commands submitted afterward to this queue from executing @@ -3574,6 +3569,9 @@ class __SYCL_EXPORT handler { }); } + // Implementation of USM fill using command for native fill. + void fill_impl(void *Dest, const void *Value, size_t ValueSize, size_t Count); + // Implementation of ext_oneapi_memcpy2d using command for native 2D memcpy. void ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 8bf4eea26620c..af2a70cd30158 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -902,12 +902,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 609750a4892b7..d52069eaa5ff7 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -905,12 +905,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8e6224ba5794a..a65fa3beab0fe 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -929,23 +929,22 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -/// USM Memset API +/// USM Fill API /// /// @param Queue is the queue to submit to -/// @param Ptr is the ptr to memset -/// @param Value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// @param Count is the size in bytes to memset +/// @param Ptr is the ptr to fill +/// \param Pattern is the ptr with the bytes of the pattern to set +/// \param PatternSize is the size in bytes of the pattern to set +/// @param Count is the size in bytes to fill /// @param NumEventsInWaitlist is the number of events to wait on /// @param EventsWaitlist is an array of events to wait on /// @param Event is the event that represents this operation -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index 35f17a5316bac..f71833a30134a 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -905,12 +905,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 9441e29804021..479f888035b22 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -864,12 +864,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 889518aa76dba..afd574455cffd 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3889,11 +3889,12 @@ inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, return PI_SUCCESS; } -inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { +inline pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Count, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); if (!Ptr) { return PI_ERROR_INVALID_VALUE; @@ -3905,8 +3906,7 @@ inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - size_t PatternSize = 1; - HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, &Value, Count, + HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, Pattern, Count, NumEventsInWaitList, UrEventsWaitList, UREvent)); diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 0f42f21d39093..c7a75fa3845a4 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -437,24 +437,24 @@ __SYCL_EXPORT pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, ParamValueSizeRet); } -/// USM Memset API +/// USM Fill API /// -/// @param Queue is the queue to submit to -/// @param Ptr is the ptr to memset -/// @param Value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// @param Count is the size in bytes to memset -/// @param NumEventsInWaitlist is the number of events to wait on -/// @param EventsWaitlist is an array of events to wait on -/// @param Event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +/// \param queue is the queue to submit to +/// \param ptr is the ptr to fill +/// \param pattern is the ptr with the bytes of the pattern to set +/// \param patternSize is the size in bytes of the pattern to set +/// \param count is the size in bytes to fill +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } __SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( @@ -1490,7 +1490,7 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { _PI_API(piEnqueueMemBufferMap) _PI_API(piEnqueueMemUnmap) _PI_API(piEnqueueMemBufferFill) - _PI_API(piextUSMEnqueueMemset) + _PI_API(piextUSMEnqueueFill) _PI_API(piEnqueueMemBufferCopyRect) _PI_API(piEnqueueMemBufferCopy) _PI_API(piextUSMEnqueueMemcpy) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index fdd3dcbea2a85..a1057517c9121 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -684,8 +684,10 @@ class node_impl { sycl::detail::CGFillUSM *FillUSM = static_cast(MCommandGroup.get()); Stream << "Dst: " << FillUSM->getDst() - << " Length: " << FillUSM->getLength() - << " Pattern: " << FillUSM->getFill() << "\\n"; + << " Length: " << FillUSM->getLength() << " Pattern: "; + for (auto byte : FillUSM->getPattern()) + Stream << byte; + Stream << "\\n"; } break; case sycl::detail::CG::CGTYPE::PrefetchUSM: diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index c5cab50a2f48a..75368f489537c 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -945,7 +945,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(!SrcQueue->getContextImplPtr()->is_host() && - "Host queue not supported in fill_usm."); + "Host queue not supported in copy_usm."); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -981,7 +981,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -1004,9 +1004,21 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(), - DepEvents.data(), OutEvent); + Plugin->call( + Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length, + DepEvents.size(), DepEvents.data(), OutEvent); +} + +// TODO: This function will remain until ABI-breaking change +void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, + int Pattern, + std::vector DepEvents, + sycl::detail::pi::PiEvent *OutEvent, + const detail::EventImplPtr &OutEventImpl) { + std::vector vecPattern(sizeof(Pattern)); + std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); + MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, + OutEventImpl); } // TODO: This function will remain until ABI-breaking change @@ -1014,7 +1026,9 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, + std::vector vecPattern(sizeof(Pattern)); + std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); + MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, nullptr); // OutEventImpl); } @@ -1680,7 +1694,8 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, std::vector Deps, + size_t Len, const std::vector &Pattern, + std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { if (!DstMem) @@ -1688,10 +1703,9 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( PI_ERROR_INVALID_VALUE); const PluginPtr &Plugin = Context->getPlugin(); - // Pattern is interpreted as an unsigned char so pattern size is always 1. - size_t PatternSize = 1; + Plugin->call( - CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(), + CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(), Deps.data(), OutSyncPoint); } diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 1d2800bf9dadc..0636fe6f3d1c0 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -171,6 +171,13 @@ class __SYCL_EXPORT MemoryManager { std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent); + static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, + const std::vector &Pattern, + std::vector DepEvents, + sycl::detail::pi::PiEvent *OutEvent, + const detail::EventImplPtr &OutEventImpl); + + // TODO: This function will remain until ABI-breaking change static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector DepEvents, @@ -319,7 +326,7 @@ class __SYCL_EXPORT MemoryManager { static void ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, + size_t Len, const std::vector &Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5592619b2316b..d3adabe185802 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -157,11 +157,11 @@ event queue_impl::memset(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - + const std::vector Pattern{static_cast(Value)}; return submitMemOpHelper( Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, - Count, Value); + Count, Pattern); } void report(const code_location &CodeLoc) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3a48fd50f8259..a335a38e810ae 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2851,7 +2851,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_fill_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), - Fill->getLength(), Fill->getFill(), std::move(MSyncPointDeps), + Fill->getLength(), Fill->getPattern(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; @@ -3045,7 +3045,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { case CG::CGTYPE::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), - Fill->getFill(), std::move(RawEvents), Event, + Fill->getPattern(), std::move(RawEvents), Event, MEvent); return PI_SUCCESS; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 7d7f094e8d4a2..d09448ce2c7d4 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -975,6 +975,17 @@ void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { setType(detail::CG::AdviseUSM); } +void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, + size_t Count) { + throwIfActionIsCreated(); + MDstPtr = Dest; + MPattern.resize(ValueSize); + std::memcpy(MPattern.data(), Value, ValueSize); + MLength = Count * ValueSize; + setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); + setType(detail::CG::FillUSM); +} + void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height) { diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 48ab65c68896c..269706b10321d 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -12,7 +12,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -20,8 +20,7 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -47,7 +46,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -57,7 +56,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -65,8 +64,7 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -92,7 +90,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index 96d53a632beb6..fb7555bab9d9f 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -12,7 +12,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -22,8 +22,7 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -58,7 +57,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -69,7 +68,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -79,8 +78,7 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -115,7 +113,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp index d2aff0dee2c20..26d3b88e64ff9 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp @@ -5,6 +5,8 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp index 7acd5143f6f54..e005170a5feff 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_host_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp index 8b8c623f48107..bb40a5683843c 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_shared_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp index 16bf82ac572ae..d2925683837c2 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp @@ -5,6 +5,8 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp index 0e9b3dbdb39e8..c08b70c695e4f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_host_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp index 950afd3da8b97..f40f8d3aa4930 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_shared_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp index 9506e99cd73ad..84a5d42f0da04 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp @@ -2,6 +2,8 @@ // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{ %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl // // Tests adding a USM memset queue shortcut operation as a graph node. diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index ed3834ddfd9fe..462c9a6d5738b 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index 327c514ebdddd..336d792a52f10 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 2ebc6b56078a4..b912cba8d14c3 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -153,11 +153,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 0bb2568eb3ff3..4be1842ff26ce 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 86860b50e57b6..71d1b60b17b79 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -153,11 +153,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a1985cf5d841e..2bb19a0022008 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3292,7 +3292,7 @@ _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptr _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj +_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmRKSt6vectorIcSaIcEES9_IjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ @@ -3305,6 +3305,7 @@ _ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPN _ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE +_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc @@ -3624,6 +3625,7 @@ _ZN4sycl3_V17handler6memcpyEPvPKvm _ZN4sycl3_V17handler6memsetEPvim _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm +_ZN4sycl3_V17handler9fill_implEPvPKvmm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 69473362c1985..f41cbd3ac062c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4100,7 +4100,7 @@ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ @@ -4151,6 +4151,8 @@ ?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z +?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z +?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp index 9cdf27dd98ddc..1eee3474c51ed 100644 --- a/sycl/tools/xpti_helpers/usm_analyzer.hpp +++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp @@ -214,8 +214,8 @@ class USMAnalyzer { USMAnalyzer::handleUSMSharedAlloc); ArgHandlerPreCall.set_piextUSMFree(USMAnalyzer::handleUSMFree); ArgHandlerPreCall.set_piMemBufferCreate(USMAnalyzer::handleMemBufferCreate); - ArgHandlerPreCall.set_piextUSMEnqueueMemset( - USMAnalyzer::handleUSMEnqueueMemset); + ArgHandlerPreCall.set_piextUSMEnqueueFill( + USMAnalyzer::handleUSMEnqueueFill); ArgHandlerPreCall.set_piextUSMEnqueueMemcpy( USMAnalyzer::handleUSMEnqueueMemcpy); ArgHandlerPreCall.set_piextUSMEnqueuePrefetch( @@ -350,11 +350,11 @@ class USMAnalyzer { } } - static void handleUSMEnqueueMemset(const pi_plugin &, - std::optional, pi_queue, - void *ptr, pi_int32, size_t numBytes, - pi_uint32, const pi_event *, pi_event *) { - CheckPointerValidness("input parameter", ptr, numBytes, "memset"); + static void handleUSMEnqueueFill(const pi_plugin &, std::optional, + pi_queue, void *ptr, const void *, size_t, + size_t numBytes, pi_uint32, const pi_event *, + pi_event *) { + CheckPointerValidness("input parameter", ptr, numBytes, "fill"); } static void handleUSMEnqueueMemcpy(const pi_plugin &, diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index 27967973c1363..3860833256048 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -75,9 +75,9 @@ pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, return PI_SUCCESS; } -static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, - pi_uint32, const pi_event *, - pi_event *event) { +static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, const void *, + size_t, size_t, pi_uint32, + const pi_event *, pi_event *event) { *event = reinterpret_cast(new int{}); return PI_SUCCESS; } @@ -97,7 +97,7 @@ TEST(GetNative, GetNativeHandle) { Mock.redefineBefore(redefinedMemRetain); Mock.redefineBefore( redefinedMemBufferCreate); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); context Context(Plt); diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index a0f267bd97d50..4bb199eb748c9 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1201,11 +1201,12 @@ inline pi_result mock_piextUSMFree(pi_context context, void *ptr) { return PI_SUCCESS; } -inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, - pi_int32 value, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +inline pi_result mock_piextUSMEnqueueFill(pi_queue queue, void *ptr, + const void *pattern, + size_t patternSize, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { *event = createDummyHandle(); return PI_SUCCESS; } diff --git a/sycl/unittests/queue/USM.cpp b/sycl/unittests/queue/USM.cpp index 30cdd2b37393e..03ff8c538d7ed 100644 --- a/sycl/unittests/queue/USM.cpp +++ b/sycl/unittests/queue/USM.cpp @@ -48,8 +48,8 @@ pi_result redefinedUSMEnqueueMemcpyAfter(pi_queue, pi_bool, void *, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, pi_int32, size_t, - pi_uint32, const pi_event *, +pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, const void *, size_t, + size_t, pi_uint32, const pi_event *, pi_event *Event) { // Set MEMSET to the event produced by the original USMEnqueueMemcpy MEMSET = *Event; @@ -64,7 +64,7 @@ TEST(USM, NoOpPreservesDependencyChain) { redefinedEnqueueEventsWaitAfter); Mock.redefineAfter( redefinedUSMEnqueueMemcpyAfter); - Mock.redefineAfter( + Mock.redefineAfter( redefinedUSMEnqueueMemsetAfter); context Ctx{Plt.get_devices()[0]}; diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index 8b2d72055d847..f9ec3fb6083ac 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -37,7 +37,8 @@ pi_result redefinedQueueCreateEx(pi_context context, pi_device device, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -88,7 +89,7 @@ TEST(QueueWait, QueueWaitTest) { Mock.redefineBefore( redefinedQueueCreateEx); Mock.redefineBefore(redefinedQueueFinish); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); Mock.redefineBefore(redefinedEventsWait); Mock.redefineBefore( diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 8693ff5e4c52b..07d36d479d3c0 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -61,7 +61,8 @@ inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, ExecutedCommands.push_back({CommandType::KERNEL, EventsCount}); return PI_SUCCESS; } -inline pi_result customextUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, +inline pi_result customextUSMEnqueueMemset(pi_queue, void *, const void *, + size_t, size_t, pi_uint32 EventsCount, const pi_event *, pi_event *) { ExecutedCommands.push_back({CommandType::MEMSET, EventsCount}); @@ -73,7 +74,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); @@ -126,7 +127,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 88c5fae49394c..c634f2dfce299 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -145,7 +145,8 @@ TEST_F(QueueApiFailures, QueueSingleTask) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -154,7 +155,7 @@ pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, } TEST_F(QueueApiFailures, QueueMemset) { - MockPlugin.redefine( + MockPlugin.redefine( redefinedUSMEnqueueMemset); MockPlugin.redefine( redefinedPluginGetLastError); @@ -241,18 +242,17 @@ TEST_F(QueueApiFailures, QueueCopy) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { +pi_result redefinedUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Count, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } TEST_F(QueueApiFailures, QueueFill) { - MockPlugin.redefine( - redefinedEnqueueMemBufferFill); + MockPlugin.redefine( + redefinedUSMEnqueueFill); MockPlugin.redefine( redefinedPluginGetLastError); sycl::queue Q;