diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 329eab2aaf832..b3ee01843c77f 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1236,18 +1236,22 @@ void exec_graph_impl::update( sycl::make_error_code(errc::invalid), "Node passed to update() is not part of the graph."); } - if (Node->MCGType != sycl::detail::CG::Kernel) { - throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes"); - } - if (Node->MCommandGroup->getRequirements().size() == 0) { - continue; + if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel || + Node->MCGType == sycl::detail::CG::Barrier)) { + throw sycl::exception(errc::invalid, + "Cannot update node type. Node must be be of " + "kernel, empty, or barrier type."); } - NeedScheduledUpdate = true; - UpdateRequirements.insert(UpdateRequirements.end(), - Node->MCommandGroup->getRequirements().begin(), - Node->MCommandGroup->getRequirements().end()); + if (const auto &CG = Node->MCommandGroup; + CG->getRequirements().size() != 0) { + NeedScheduledUpdate = true; + + UpdateRequirements.insert(UpdateRequirements.end(), + Node->MCommandGroup->getRequirements().begin(), + Node->MCommandGroup->getRequirements().end()); + } } // Clean up any execution events which have finished so we don't pass them to @@ -1290,6 +1294,11 @@ void exec_graph_impl::update( } void exec_graph_impl::updateImpl(std::shared_ptr Node) { + // Kernel node update is the only command type supported in UR for update. + // Updating any other types of nodes, e.g. empty & barrier nodes is a no-op. + if (Node->MCGType != sycl::detail::CG::Kernel) { + return; + } auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice()); diff --git a/sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp b/sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp new file mode 100644 index 0000000000000..86bc166726e1a --- /dev/null +++ b/sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp @@ -0,0 +1,119 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: aspect-usm_shared_allocations + +// Tests that whole graph update works when a graph contains a barrier node. + +#include "../graph_common.hpp" + +// Queue submissions that can be recorded to a graph, with a barrier node +// separating initialization and computation kernel nodes +template +void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) { + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input1[i] += i; + } + }); + }); + + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input2[i] += i; + } + }); + }); + + Queue.ext_oneapi_submit_barrier(); + + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Output[i] = Input1[i] * Input2[i]; + } + }); + }); +} + +int main() { + queue Queue{}; + + using T = int; + + // USM allocations for GraphA + T *InputA1 = malloc_shared(Size, Queue); + T *InputA2 = malloc_shared(Size, Queue); + T *OutputA = malloc_shared(Size, Queue); + + // Initialize USM allocations + T Pattern1 = 0xA; + T Pattern2 = 0x42; + T PatternZero = 0; + + Queue.fill(InputA1, Pattern1, Size); + Queue.fill(InputA2, Pattern2, Size); + Queue.fill(OutputA, PatternZero, Size); + Queue.wait(); + + // Define GraphA + exp_ext::command_graph GraphA{Queue}; + GraphA.begin_recording(Queue); + RecordGraph(Queue, Size, InputA1, InputA2, OutputA); + GraphA.end_recording(); + + // Finalize, run, and validate GraphA + auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{}); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputA[i], "OutputA")); + } + + // Create GraphB which will be used to update GraphA + exp_ext::command_graph GraphB{Queue}; + + // USM allocations for GraphB + T *InputB1 = malloc_shared(Size, Queue); + T *InputB2 = malloc_shared(Size, Queue); + T *OutputB = malloc_shared(Size, Queue); + + // Initialize GraphB allocations + Pattern1 = -42; + Pattern2 = 0xF; + + Queue.fill(InputB1, Pattern1, Size); + Queue.fill(InputB2, Pattern2, Size); + Queue.fill(OutputB, PatternZero, Size); + Queue.wait(); + + // Create GraphB + GraphB.begin_recording(Queue); + RecordGraph(Queue, Size, InputB1, InputB2, OutputB); + GraphB.end_recording(); + + // Update executable GraphA with GraphB, run, and validate + GraphExecA.update(GraphB); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputB[i], "OutputB")); + } + + free(InputA1, Queue); + free(InputA2, Queue); + free(OutputA, Queue); + + free(InputB1, Queue); + free(InputB2, Queue); + free(OutputB, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp b/sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp new file mode 100644 index 0000000000000..8816eb385936f --- /dev/null +++ b/sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp @@ -0,0 +1,120 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: aspect-usm_shared_allocations + +// Tests that whole graph update works when a graph contain an empty node. + +#include "../graph_common.hpp" + +// Creates a graph with an empty node separating initialization and computation +// kernel nodes +template +void CreateGraph( + exp_ext::command_graph &Graph, + size_t Size, T *Input1, T *Input2, T *Output) { + Graph.add([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input1[i] += i; + } + }); + }); + + Graph.add([&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Input2[i] += i; + } + }); + }); + + auto EmptyNodeA = + Graph.add({exp_ext::property::node::depends_on_all_leaves()}); + + Graph.add( + [&](handler &CGH) { + CGH.single_task([=]() { + for (int i = 0; i < Size; i++) { + Output[i] = Input1[i] * Input2[i]; + } + }); + }, + {exp_ext::property::node::depends_on(EmptyNodeA)}); +} + +int main() { + queue Queue{}; + + using T = int; + + // USM allocations for GraphA + T *InputA1 = malloc_shared(Size, Queue); + T *InputA2 = malloc_shared(Size, Queue); + T *OutputA = malloc_shared(Size, Queue); + + // Initialize USM allocations + T Pattern1 = 0xA; + T Pattern2 = 0x42; + T PatternZero = 0; + + Queue.fill(InputA1, Pattern1, Size); + Queue.fill(InputA2, Pattern2, Size); + Queue.fill(OutputA, PatternZero, Size); + Queue.wait(); + + // Construct GraphA + exp_ext::command_graph GraphA{Queue}; + CreateGraph(GraphA, Size, InputA1, InputA2, OutputA); + + // Finalize, run, and validate GraphA + auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{}); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputA[i], "OutputA")); + } + + // Create GraphB which will be used to update GraphA + exp_ext::command_graph GraphB{Queue}; + + // USM allocations for GraphB + T *InputB1 = malloc_shared(Size, Queue); + T *InputB2 = malloc_shared(Size, Queue); + T *OutputB = malloc_shared(Size, Queue); + + // Initialize GraphB + Pattern1 = -42; + Pattern2 = 0xF; + + Queue.fill(InputB1, Pattern1, Size); + Queue.fill(InputB2, Pattern2, Size); + Queue.fill(OutputB, PatternZero, Size); + Queue.wait(); + + // Construct GraphB + CreateGraph(GraphB, Size, InputB1, InputB2, OutputB); + + // Update executable GraphA with GraphB, run, and validate + GraphExecA.update(GraphB); + Queue.ext_oneapi_graph(GraphExecA).wait(); + + for (int i = 0; i < Size; i++) { + T Ref = (Pattern1 + i) * (Pattern2 + i); + assert(check_value(i, Ref, OutputB[i], "OutputB")); + } + + free(InputA1, Queue); + free(InputA2, Queue); + free(OutputA, Queue); + + free(InputB1, Queue); + free(InputB2, Queue); + free(OutputB, Queue); + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index bb813cf211246..59182ed5b5226 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -109,6 +109,11 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { cgh.host_task([]() {}); })); + ASSERT_ANY_THROW(auto NodeBarrier = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynamicParam); + cgh.ext_oneapi_barrier(); + })); + auto NodeEmpty = Graph.add(); experimental::command_graph Subgraph(Queue.get_context(), Dev); @@ -375,3 +380,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) { auto GraphExec = Graph.finalize(); EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception); } + +TEST_F(WholeGraphUpdateTest, EmptyNode) { + // Test that updating a graph that has an empty node is not an error + auto NodeEmpty = Graph.add(); + auto UpdateNodeEmpty = UpdateGraph.add(); + + auto NodeKernel = Graph.add(EmptyKernel); + auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel); + + auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); + GraphExec.update(UpdateGraph); +} + +TEST_F(WholeGraphUpdateTest, BarrierNode) { + // Test that updating a graph that has a barrier node is not an error + Graph.begin_recording(Queue); + auto NodeKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({NodeKernel}); + Graph.end_recording(Queue); + + UpdateGraph.begin_recording(Queue); + auto UpdateNodeKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({UpdateNodeKernel}); + UpdateGraph.end_recording(Queue); + + auto GraphExec = Graph.finalize(experimental::property::graph::updatable{}); + GraphExec.update(UpdateGraph); +}