diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index ab066ed1906a4..a24905d4214da 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -118,11 +118,6 @@ event queue_impl::memset(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - if (MGraph.lock()) { - throw sycl::exception(make_error_code(errc::invalid), - "The memset feature is not yet available " - "for use with the SYCL Graph extension."); - } return submitMemOpHelper( Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, @@ -169,11 +164,6 @@ event queue_impl::memcpy(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - // If we have a command graph set we need to capture the copy through normal - // queue submission rather than execute the copy directly. - auto HandlerFunc = [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }; - if (MGraph.lock()) - return submitWithHandler(Self, DepEvents, HandlerFunc); if ((!Src || !Dest) && Count != 0) { report(CodeLoc); @@ -181,7 +171,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, PI_ERROR_INVALID_VALUE); } return submitMemOpHelper( - Self, DepEvents, HandlerFunc, + Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, [](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self, Count, Dest); } @@ -190,14 +180,9 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector &DepEvents) { - // If we have a command graph set we need to capture the advise through normal - // queue submission. - auto HandlerFunc = [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }; - if (MGraph.lock()) - return submitWithHandler(Self, DepEvents, HandlerFunc); - return submitMemOpHelper( - Self, DepEvents, HandlerFunc, + Self, DepEvents, + [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }, [](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr, Self, Length, Advice); } @@ -353,7 +338,10 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, const std::vector &ExpandedDepEvents = getExtendDependencyList(DepEvents, MutableDepEvents, Lock); - if (areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) { + // If we have a command graph set we need to capture the op through the + // handler rather than by-passing the scheduler. + if (!MGraph.lock() && + areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) { if (MSupportsDiscardingPiEvents) { MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr); diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp new file mode 100644 index 0000000000000..d3936a2cfe74d --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp @@ -0,0 +1,48 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// +// Tests adding a USM memset queue shortcut operation as a graph node. + +#include "../graph_common.hpp" + +int main() { + + queue Queue; + + if (!are_graphs_supported(Queue)) { + return 0; + } + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 10; + unsigned char *Arr = malloc_device(N, Queue); + + int Value = 77; + Graph.begin_recording(Queue); + auto Init = Queue.memset(Arr, Value, N); + Queue.submit([&](handler &CGH) { + CGH.depends_on(Init); + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) + Arr[i] = 2 * Arr[i]; + }); + }); + + Graph.end_recording(); + + auto ExecGraph = Graph.finalize(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait(); + + std::vector Output(N); + Queue.memcpy(Output.data(), Arr, N).wait(); + for (int i = 0; i < N; i++) + assert(Output[i] == (Value * 2)); + + sycl::free(Arr, Queue); + + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph.cpp index ce7bb02e278b2..859e14d3ea604 100644 --- a/sycl/unittests/Extensions/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph.cpp @@ -1848,25 +1848,6 @@ TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) { ASSERT_EQ(ExceptionCode, sycl::errc::invalid); } -TEST_F(CommandGraphTest, USMMemsetShortcutExceptionCheck) { - - const size_t N = 10; - unsigned char *Arr = malloc_device(N, Queue); - int Value = 77; - - Graph.begin_recording(Queue); - - std::error_code ExceptionCode = make_error_code(sycl::errc::success); - try { - Queue.memset(Arr, Value, N); - } catch (exception &Exception) { - ExceptionCode = Exception.code(); - } - ASSERT_EQ(ExceptionCode, sycl::errc::invalid); - - Graph.end_recording(Queue); -} - TEST_F(CommandGraphTest, Memcpy2DExceptionCheck) { constexpr size_t RECT_WIDTH = 30; constexpr size_t RECT_HEIGHT = 21;