Skip to content

Commit

Permalink
[SYCL][Graph] Fix memset queue shortcut when queue is recorded (intel…
Browse files Browse the repository at this point in the history
…#12508)

Memset queue shortcut `queue::memset()` manages the memset direclty from
the host (without going through the normal path, i.e. the handler).
We added a specific case when the queue is recorded to use the normal
path instead of the optimized path.
  • Loading branch information
mfrancepillois committed Feb 13, 2024
1 parent c872cad commit 91087b9
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 38 deletions.
26 changes: 7 additions & 19 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,6 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
// Emit a begin/end scope for this call
PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
#endif
if (MGraph.lock()) {
throw sycl::exception(make_error_code(errc::invalid),
"The memset feature is not yet available "
"for use with the SYCL Graph extension.");
}

return submitMemOpHelper(
Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
Expand Down Expand Up @@ -169,19 +164,14 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
// Emit a begin/end scope for this call
PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
#endif
// If we have a command graph set we need to capture the copy through normal
// queue submission rather than execute the copy directly.
auto HandlerFunc = [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); };
if (MGraph.lock())
return submitWithHandler(Self, DepEvents, HandlerFunc);

if ((!Src || !Dest) && Count != 0) {
report(CodeLoc);
throw runtime_error("NULL pointer argument in memory copy operation.",
PI_ERROR_INVALID_VALUE);
}
return submitMemOpHelper(
Self, DepEvents, HandlerFunc,
Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
[](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
Count, Dest);
}
Expand All @@ -190,14 +180,9 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
const void *Ptr, size_t Length,
pi_mem_advice Advice,
const std::vector<event> &DepEvents) {
// If we have a command graph set we need to capture the advise through normal
// queue submission.
auto HandlerFunc = [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); };
if (MGraph.lock())
return submitWithHandler(Self, DepEvents, HandlerFunc);

return submitMemOpHelper(
Self, DepEvents, HandlerFunc,
Self, DepEvents,
[&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
[](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
Self, Length, Advice);
}
Expand Down Expand Up @@ -353,7 +338,10 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &ExpandedDepEvents =
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);

if (areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
// If we have a command graph set we need to capture the op through the
// handler rather than by-passing the scheduler.
if (!MGraph.lock() &&
areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
if (MSupportsDiscardingPiEvents) {
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
Expand Down
48 changes: 48 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
//
// Tests adding a USM memset queue shortcut operation as a graph node.

#include "../graph_common.hpp"

int main() {

queue Queue;

if (!are_graphs_supported(Queue)) {
return 0;
}

exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};

const size_t N = 10;
unsigned char *Arr = malloc_device<unsigned char>(N, Queue);

int Value = 77;
Graph.begin_recording(Queue);
auto Init = Queue.memset(Arr, Value, N);
Queue.submit([&](handler &CGH) {
CGH.depends_on(Init);
CGH.single_task<class double_dest>([=]() {
for (int i = 0; i < Size; i++)
Arr[i] = 2 * Arr[i];
});
});

Graph.end_recording();

auto ExecGraph = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait();

std::vector<unsigned char> Output(N);
Queue.memcpy(Output.data(), Arr, N).wait();
for (int i = 0; i < N; i++)
assert(Output[i] == (Value * 2));

sycl::free(Arr, Queue);

return 0;
}
19 changes: 0 additions & 19 deletions sycl/unittests/Extensions/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1848,25 +1848,6 @@ TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) {
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
}

TEST_F(CommandGraphTest, USMMemsetShortcutExceptionCheck) {

const size_t N = 10;
unsigned char *Arr = malloc_device<unsigned char>(N, Queue);
int Value = 77;

Graph.begin_recording(Queue);

std::error_code ExceptionCode = make_error_code(sycl::errc::success);
try {
Queue.memset(Arr, Value, N);
} catch (exception &Exception) {
ExceptionCode = Exception.code();
}
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);

Graph.end_recording(Queue);
}

TEST_F(CommandGraphTest, Memcpy2DExceptionCheck) {
constexpr size_t RECT_WIDTH = 30;
constexpr size_t RECT_HEIGHT = 21;
Expand Down

0 comments on commit 91087b9

Please sign in to comment.