Skip to content

Commit

Permalink
[SYCL][Graph] Permit empty & barrier nodes in WGU
Browse files Browse the repository at this point in the history
In order to enable the minimum viable real life usecase
for the Whole Graph Update feature. Allow graphs to contain
empty nodes and barrier nodes during update.

See discussion thread
intel#13253 (comment)
on SYCL-Graph spec PR for publicizing the availability of the
Whole Graph Update feature.
  • Loading branch information
EwanC committed Jun 20, 2024
1 parent 82f77d1 commit 768e8d5
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 9 deletions.
27 changes: 18 additions & 9 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1236,18 +1236,22 @@ void exec_graph_impl::update(
sycl::make_error_code(errc::invalid),
"Node passed to update() is not part of the graph.");
}
if (Node->MCGType != sycl::detail::CG::Kernel) {
throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes");
}

if (Node->MCommandGroup->getRequirements().size() == 0) {
continue;
if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel ||
Node->MCGType == sycl::detail::CG::Barrier)) {
throw sycl::exception(errc::invalid,
"Cannot update node type. Node must be be of "
"kernel, empty, or barrier type.");
}
NeedScheduledUpdate = true;

UpdateRequirements.insert(UpdateRequirements.end(),
Node->MCommandGroup->getRequirements().begin(),
Node->MCommandGroup->getRequirements().end());
if (const auto &CG = Node->MCommandGroup;
CG->getRequirements().size() != 0) {
NeedScheduledUpdate = true;

UpdateRequirements.insert(UpdateRequirements.end(),
Node->MCommandGroup->getRequirements().begin(),
Node->MCommandGroup->getRequirements().end());
}
}

// Clean up any execution events which have finished so we don't pass them to
Expand Down Expand Up @@ -1290,6 +1294,11 @@ void exec_graph_impl::update(
}

void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
// Kernel node update is the only command type supported in UR for update.
// Updating any other types of nodes, e.g. empty & barrier nodes is a no-op.
if (Node->MCGType != sycl::detail::CG::Kernel) {
return;
}
auto ContextImpl = sycl::detail::getSyclObjImpl(MContext);
const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice());
Expand Down
119 changes: 119 additions & 0 deletions sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// REQUIRES: aspect-usm_shared_allocations

// Tests that whole graph update works when a graph contains a barrier node.

#include "../graph_common.hpp"

// Queue submissions that can be recorded to a graph, with a barrier node
// separating initialization and computation kernel nodes
template <class T>
void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) {
Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input1[i] += i;
}
});
});

Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input2[i] += i;
}
});
});

Queue.ext_oneapi_submit_barrier();

Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Output[i] = Input1[i] * Input2[i];
}
});
});
}

int main() {
queue Queue{};

using T = int;

// USM allocations for GraphA
T *InputA1 = malloc_shared<T>(Size, Queue);
T *InputA2 = malloc_shared<T>(Size, Queue);
T *OutputA = malloc_shared<T>(Size, Queue);

// Initialize USM allocations
T Pattern1 = 0xA;
T Pattern2 = 0x42;
T PatternZero = 0;

Queue.fill(InputA1, Pattern1, Size);
Queue.fill(InputA2, Pattern2, Size);
Queue.fill(OutputA, PatternZero, Size);
Queue.wait();

// Define GraphA
exp_ext::command_graph GraphA{Queue};
GraphA.begin_recording(Queue);
RecordGraph(Queue, Size, InputA1, InputA2, OutputA);
GraphA.end_recording();

// Finalize, run, and validate GraphA
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
Queue.ext_oneapi_graph(GraphExecA).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, OutputA[i], "OutputA"));
}

// Create GraphB which will be used to update GraphA
exp_ext::command_graph GraphB{Queue};

// USM allocations for GraphB
T *InputB1 = malloc_shared<T>(Size, Queue);
T *InputB2 = malloc_shared<T>(Size, Queue);
T *OutputB = malloc_shared<T>(Size, Queue);

// Initialize GraphB allocations
Pattern1 = -42;
Pattern2 = 0xF;

Queue.fill(InputB1, Pattern1, Size);
Queue.fill(InputB2, Pattern2, Size);
Queue.fill(OutputB, PatternZero, Size);
Queue.wait();

// Create GraphB
GraphB.begin_recording(Queue);
RecordGraph(Queue, Size, InputB1, InputB2, OutputB);
GraphB.end_recording();

// Update executable GraphA with GraphB, run, and validate
GraphExecA.update(GraphB);
Queue.ext_oneapi_graph(GraphExecA).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, OutputB[i], "OutputB"));
}

free(InputA1, Queue);
free(InputA2, Queue);
free(OutputA, Queue);

free(InputB1, Queue);
free(InputB2, Queue);
free(OutputB, Queue);
return 0;
}
120 changes: 120 additions & 0 deletions sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// REQUIRES: aspect-usm_shared_allocations

// Tests that whole graph update works when a graph contain an empty node.

#include "../graph_common.hpp"

// Creates a graph with an empty node separating initialization and computation
// kernel nodes
template <class T>
void CreateGraph(
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
size_t Size, T *Input1, T *Input2, T *Output) {
Graph.add([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input1[i] += i;
}
});
});

Graph.add([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input2[i] += i;
}
});
});

auto EmptyNodeA =
Graph.add({exp_ext::property::node::depends_on_all_leaves()});

Graph.add(
[&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Output[i] = Input1[i] * Input2[i];
}
});
},
{exp_ext::property::node::depends_on(EmptyNodeA)});
}

int main() {
queue Queue{};

using T = int;

// USM allocations for GraphA
T *InputA1 = malloc_shared<T>(Size, Queue);
T *InputA2 = malloc_shared<T>(Size, Queue);
T *OutputA = malloc_shared<T>(Size, Queue);

// Initialize USM allocations
T Pattern1 = 0xA;
T Pattern2 = 0x42;
T PatternZero = 0;

Queue.fill(InputA1, Pattern1, Size);
Queue.fill(InputA2, Pattern2, Size);
Queue.fill(OutputA, PatternZero, Size);
Queue.wait();

// Construct GraphA
exp_ext::command_graph GraphA{Queue};
CreateGraph(GraphA, Size, InputA1, InputA2, OutputA);

// Finalize, run, and validate GraphA
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
Queue.ext_oneapi_graph(GraphExecA).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, OutputA[i], "OutputA"));
}

// Create GraphB which will be used to update GraphA
exp_ext::command_graph GraphB{Queue};

// USM allocations for GraphB
T *InputB1 = malloc_shared<T>(Size, Queue);
T *InputB2 = malloc_shared<T>(Size, Queue);
T *OutputB = malloc_shared<T>(Size, Queue);

// Initialize GraphB
Pattern1 = -42;
Pattern2 = 0xF;

Queue.fill(InputB1, Pattern1, Size);
Queue.fill(InputB2, Pattern2, Size);
Queue.fill(OutputB, PatternZero, Size);
Queue.wait();

// Construct GraphB
CreateGraph(GraphB, Size, InputB1, InputB2, OutputB);

// Update executable GraphA with GraphB, run, and validate
GraphExecA.update(GraphB);
Queue.ext_oneapi_graph(GraphExecA).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, OutputB[i], "OutputB"));
}

free(InputA1, Queue);
free(InputA2, Queue);
free(OutputA, Queue);

free(InputB1, Queue);
free(InputB2, Queue);
free(OutputB, Queue);
return 0;
}
35 changes: 35 additions & 0 deletions sycl/unittests/Extensions/CommandGraph/Update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) {
cgh.host_task([]() {});
}));

ASSERT_ANY_THROW(auto NodeBarrier = Graph.add([&](sycl::handler &cgh) {
cgh.set_arg(0, DynamicParam);
cgh.ext_oneapi_barrier();
}));

auto NodeEmpty = Graph.add();

experimental::command_graph Subgraph(Queue.get_context(), Dev);
Expand Down Expand Up @@ -375,3 +380,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) {
auto GraphExec = Graph.finalize();
EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception);
}

TEST_F(WholeGraphUpdateTest, EmptyNode) {
// Test that updating a graph that has an empty node is not an error
auto NodeEmpty = Graph.add();
auto UpdateNodeEmpty = UpdateGraph.add();

auto NodeKernel = Graph.add(EmptyKernel);
auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel);

auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
GraphExec.update(UpdateGraph);
}

TEST_F(WholeGraphUpdateTest, BarrierNode) {
// Test that updating a graph that has a barrier node is not an error
Graph.begin_recording(Queue);
auto NodeKernel = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({NodeKernel});
Graph.end_recording(Queue);

UpdateGraph.begin_recording(Queue);
auto UpdateNodeKernel = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({UpdateNodeKernel});
UpdateGraph.end_recording(Queue);

auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
GraphExec.update(UpdateGraph);
}

0 comments on commit 768e8d5

Please sign in to comment.