Skip to content

Commit

Permalink
[SYCL][Graph] Fix in_order queue with empty nodes
Browse files Browse the repository at this point in the history
Adding a empty node to a recorded in-order queue resulted
in inconsistent dependencies between nodes.
This patch fixes this issues and simplifies the adding of empty nodes.
Unitests have been added to check node dependencies when
recording an in_order queue with and without empty nodes.

Fixes Issue: #239
  • Loading branch information
mfrancepillois committed Jul 5, 2023
1 parent 56929b2 commit 7f683f8
Show file tree
Hide file tree
Showing 3 changed files with 303 additions and 20 deletions.
31 changes: 11 additions & 20 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,27 +380,18 @@ event handler::finalize() {
if (detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL)) {
std::cout << "WARNING: An empty command group is submitted." << std::endl;
}
if (MGraph) {
CommandGroup.reset(
new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc));
MGraphNodeCG = std::move(CommandGroup);
} else if (auto QueueGraph = MQueue->getCommandGraph(); QueueGraph) {
auto EventImpl = std::make_shared<detail::event_impl>();

// Extract relevant data from the handler and pass to graph to create a
// new node representing this command group.
std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
QueueGraph->add(CGData.MEvents);

// Associate an event with this new node and return the event.
QueueGraph->addEventForNode(EventImpl, NodeImpl);

return detail::createSyclObjFromImpl<event>(EventImpl);
}

detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
MLastEvent = detail::createSyclObjFromImpl<event>(Event);
return MLastEvent;
// Empty nodes are handled by Graph like standard nodes
// For Standard mode (non-graph),
// empty nodes are not sent to the scheduler to save time
if(MGraph || MQueue->getCommandGraph()){
CommandGroup.reset(new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc));
}else{
detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
MLastEvent = detail::createSyclObjFromImpl<event>(Event);
return MLastEvent;
}
break;
}

if (!MSubgraphNode && !CommandGroup)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// REQUIRES: level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
//
// CHECK-NOT: LEAK

// Tests a dotp operation using device USM and an in-order queue with empty nodes.
// The second run is to check that there are no leaks reported with the embedded
// ZE_DEBUG=4 testing capability.

#include "../graph_common.hpp"

int main() {
property_list Properties{property::queue::in_order()};
queue Queue{gpu_selector_v, Properties};

exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};

float *Dotp = malloc_device<float>(1, Queue);

const size_t N = 10;
float *X = malloc_device<float>(N, Queue);
float *Y = malloc_device<float>(N, Queue);
float *Z = malloc_device<float>(N, Queue);

Graph.begin_recording(Queue);

auto InitEvent = Queue.submit([&](handler &CGH) {
CGH.parallel_for(N, [=](id<1> it) {
const size_t i = it[0];
X[i] = 1.0f;
Y[i] = 2.0f;
Z[i] = 3.0f;
});
});

auto Empty1 = Queue.submit([&](handler &) {});

auto EventA = Queue.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{N}, [=](id<1> it) {
const size_t i = it[0];
X[i] = Alpha * X[i] + Beta * Y[i];
});
});

auto EventB = Queue.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{N}, [=](id<1> it) {
const size_t i = it[0];
Z[i] = Gamma * Z[i] + Beta * Y[i];
});
});

auto Empty2 = Queue.submit([&](handler &) {});

Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (size_t j = 0; j < N; j++) {
Dotp[0] += X[j] * Z[j];
}
});
});

Graph.end_recording();

auto ExecGraph = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });

float Output;
Queue.memcpy(&Output, Dotp, sizeof(float)).wait();

assert(Output == dotp_reference_result(N));

sycl::free(Dotp, Queue);
sycl::free(X, Queue);
sycl::free(Y, Queue);
sycl::free(Z, Queue);

return 0;
}
211 changes: 211 additions & 0 deletions sycl/unittests/Extensions/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,214 @@ TEST_F(CommandGraphTest, RecordSubGraph) {
sycl::detail::getSyclObjImpl(Node3MainGraph));
ASSERT_EQ(Queue.get_context(), MainGraphExecImpl->getContext());
}

TEST_F(CommandGraphTest, InOrderQueue) {
sycl::property_list Properties{sycl::property::queue::in_order()};
sycl::queue InOrderQueue{Dev, Properties};

// Record in-order queue with three nodes
Graph.begin_recording(InOrderQueue);
auto Node1Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode1 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode1, nullptr);
ASSERT_TRUE(PtrNode1->MPredecessors.empty());

auto Node2Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode2 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode2, nullptr);
ASSERT_NE(PtrNode2, PtrNode1);
ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode1->MSuccessors.front(), PtrNode2);
ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1);

auto Node3Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode3 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode3, nullptr);
ASSERT_NE(PtrNode3, PtrNode2);
ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MSuccessors.front(), PtrNode3);
ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2);

Graph.end_recording(InOrderQueue);

// Finalize main graph and check schedule
auto GraphExec = Graph.finalize();
auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec);
auto Schedule = GraphExecImpl->getSchedule();
auto ScheduleIt = Schedule.begin();
ASSERT_EQ(Schedule.size(), 3ul);
ASSERT_EQ(*ScheduleIt, PtrNode1);
ScheduleIt++;
ASSERT_EQ(*ScheduleIt, PtrNode2);
ScheduleIt++;
ASSERT_EQ(*ScheduleIt, PtrNode3);
ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext());
}

TEST_F(CommandGraphTest, InOrderQueueWithEmpty) {
sycl::property_list Properties{sycl::property::queue::in_order()};
sycl::queue InOrderQueue{Dev, Properties};

// Record in-order queue with a regular node then empty node then a regular
// node
Graph.begin_recording(InOrderQueue);
auto Node1Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode1 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode1, nullptr);
ASSERT_TRUE(PtrNode1->MPredecessors.empty());

auto Node2Graph = InOrderQueue.submit([&](sycl::handler &cgh) {});

auto PtrNode2 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode2, nullptr);
ASSERT_NE(PtrNode2, PtrNode1);
ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode1->MSuccessors.front(), PtrNode2);
ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1);

auto Node3Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode3 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode3, nullptr);
ASSERT_NE(PtrNode3, PtrNode2);
ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MSuccessors.front(), PtrNode3);
ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2);

Graph.end_recording(InOrderQueue);

// Finalize main graph and check schedule
// Note that empty nodes are not scheduled
auto GraphExec = Graph.finalize();
auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec);
auto Schedule = GraphExecImpl->getSchedule();
auto ScheduleIt = Schedule.begin();
ASSERT_EQ(Schedule.size(), 2ul);
ASSERT_EQ(*ScheduleIt, PtrNode1);
ScheduleIt++;
ASSERT_EQ(*ScheduleIt, PtrNode3);
ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext());
}

TEST_F(CommandGraphTest, InOrderQueueWithEmptyFirst) {
sycl::property_list Properties{sycl::property::queue::in_order()};
sycl::queue InOrderQueue{Dev, Properties};

// Record in-order queue with an empty node then two regular nodes
Graph.begin_recording(InOrderQueue);
auto Node1Graph = InOrderQueue.submit([&](sycl::handler &cgh) {});

auto PtrNode1 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode1, nullptr);
ASSERT_TRUE(PtrNode1->MPredecessors.empty());

auto Node2Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode2 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode2, nullptr);
ASSERT_NE(PtrNode2, PtrNode1);
ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode1->MSuccessors.front(), PtrNode2);
ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1);

auto Node3Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode3 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode3, nullptr);
ASSERT_NE(PtrNode3, PtrNode2);
ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MSuccessors.front(), PtrNode3);
ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2);

Graph.end_recording(InOrderQueue);

// Finalize main graph and check schedule
// Note that empty nodes are not scheduled
auto GraphExec = Graph.finalize();
auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec);
auto Schedule = GraphExecImpl->getSchedule();
auto ScheduleIt = Schedule.begin();
ASSERT_EQ(Schedule.size(), 2ul);
ASSERT_EQ(*ScheduleIt, PtrNode2);
ScheduleIt++;
ASSERT_EQ(*ScheduleIt, PtrNode3);
ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext());
}

TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) {
sycl::property_list Properties{sycl::property::queue::in_order()};
sycl::queue InOrderQueue{Dev, Properties};

// Record in-order queue with two regular nodes then an empty node
Graph.begin_recording(InOrderQueue);
auto Node1Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode1 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode1, nullptr);
ASSERT_TRUE(PtrNode1->MPredecessors.empty());

auto Node2Graph = InOrderQueue.submit(
[&](sycl::handler &cgh) { cgh.single_task<class TestKernel>([]() {}); });

auto PtrNode2 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode2, nullptr);
ASSERT_NE(PtrNode2, PtrNode1);
ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode1->MSuccessors.front(), PtrNode2);
ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1);

auto Node3Graph = InOrderQueue.submit([&](sycl::handler &cgh) {});

auto PtrNode3 = sycl::detail::getSyclObjImpl(Graph)->getLastInorderNode(
sycl::detail::getSyclObjImpl(InOrderQueue));
ASSERT_NE(PtrNode3, nullptr);
ASSERT_NE(PtrNode3, PtrNode2);
ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu);
ASSERT_EQ(PtrNode2->MSuccessors.front(), PtrNode3);
ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu);
ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2);

Graph.end_recording(InOrderQueue);

// Finalize main graph and check schedule
// Note that empty nodes are not scheduled
auto GraphExec = Graph.finalize();
auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec);
auto Schedule = GraphExecImpl->getSchedule();
auto ScheduleIt = Schedule.begin();
ASSERT_EQ(Schedule.size(), 2ul);
ASSERT_EQ(*ScheduleIt, PtrNode1);
ScheduleIt++;
ASSERT_EQ(*ScheduleIt, PtrNode2);
ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext());
}

0 comments on commit 7f683f8

Please sign in to comment.