Skip to content

Commit

Permalink
[SYCL][Graph] Support for 2d and 3d buffer fill ops (#12500)
Browse files Browse the repository at this point in the history
Adds support for 2d and 3d buffer fill ops (implementation based on the
regular buffer fill submission).
Adds tests that verify 2d and 3d buffer fill.
  • Loading branch information
mfrancepillois authored Feb 2, 2024
1 parent faad41d commit 78c30da
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 3 deletions.
16 changes: 13 additions & 3 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1700,21 +1700,31 @@ void MemoryManager::ext_oneapi_fill_cmd_buffer(
unsigned int ElementSize,
std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
(void)Size;
assert(SYCLMemObj && "The SYCLMemObj is nullptr");

const PluginPtr &Plugin = Context->getPlugin();
if (SYCLMemObj->getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
"Images are not supported in Graphs");
}
if (Dim <= 1) {

// 2D and 3D buffers accessors can't have custom range or the data will
// likely be discontiguous.
bool RangesUsable = (Dim <= 1) || (Size == AccessRange);
// For 2D and 3D buffers, the offset must be 0, or the data will be
// discontiguous.
bool OffsetUsable = (Dim <= 1) || (AccessOffset == sycl::id<3>{0, 0, 0});
size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2];

if (RangesUsable && OffsetUsable) {
Plugin->call<PiApiKind::piextCommandBufferMemBufferFill>(
CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
PatternSize, AccessOffset[0] * ElementSize,
AccessRange[0] * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
return;
}
// The sycl::handler uses a parallel_for kernel in the case of unusable
// Range or Offset, not CG:Fill. So we should not be here.
throw runtime_error("Not supported configuration of fill requested",
PI_ERROR_INVALID_OPERATION);
}
Expand Down
11 changes: 11 additions & 0 deletions sycl/test-e2e/Graph/Explicit/buffer_fill_2d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %}
//
// TODO enable cuda once buffer issue investigated and fixed
// UNSUPPORTED: cuda

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/buffer_fill_2d.cpp"
11 changes: 11 additions & 0 deletions sycl/test-e2e/Graph/Explicit/buffer_fill_3d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %}
//
// TODO enable cuda once buffer issue investigated and fixed
// UNSUPPORTED: cuda

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/buffer_fill_3d.cpp"
90 changes: 90 additions & 0 deletions sycl/test-e2e/Graph/Inputs/buffer_fill_2d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Tests adding a 2d Buffer fill operation as a graph node.

#include "../graph_common.hpp"

int main() {

queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}}};

if (!are_graphs_supported(Queue)) {
return 0;
}

const size_t N = 10;
const float Pattern = 3.14f;
std::vector<float> Data(N * N);
buffer<float, 2> Buffer{Data.data(), range<2>(N, N)};

const uint64_t PatternI64 = 0x3333333355555555;
std::vector<uint64_t> DataI64(N * N);
buffer<uint64_t, 2> BufferI64{DataI64.data(), range<2>(N, N)};

const uint32_t PatternI32 = 888;
std::vector<uint32_t> DataI32(N * N);
buffer<uint32_t, 2> BufferI32{DataI32.data(), range<2>(N, N)};

const uint16_t PatternI16 = 777;
std::vector<uint16_t> DataI16(N * N);
buffer<uint16_t, 2> BufferI16{DataI16.data(), range<2>(N, N)};

const uint8_t PatternI8 = 33;
std::vector<uint8_t> DataI8(N * N);
buffer<uint8_t, 2> BufferI8{DataI8.data(), range<2>(N, N)};

Buffer.set_write_back(false);
BufferI64.set_write_back(false);
BufferI32.set_write_back(false);
BufferI16.set_write_back(false);
BufferI8.set_write_back(false);
{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = Buffer.get_access(CGH);
CGH.fill(Acc, Pattern);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI64.get_access(CGH);
CGH.fill(Acc, PatternI64);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI32.get_access(CGH);
CGH.fill(Acc, PatternI32);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI16.get_access(CGH);
CGH.fill(Acc, PatternI16);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI8.get_access(CGH);
CGH.fill(Acc, PatternI8);
});

auto ExecGraph = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait();
}
host_accessor HostData(Buffer);
host_accessor HostDataI64(BufferI64);
host_accessor HostDataI32(BufferI32);
host_accessor HostDataI16(BufferI16);
host_accessor HostDataI8(BufferI8);
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
assert(HostData[i][j] == Pattern);
assert(HostDataI64[i][j] == PatternI64);
assert(HostDataI32[i][j] == PatternI32);
assert(HostDataI16[i][j] == PatternI16);
assert(HostDataI8[i][j] == PatternI8);
}

return 0;
}
}
92 changes: 92 additions & 0 deletions sycl/test-e2e/Graph/Inputs/buffer_fill_3d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Tests adding a 3d Buffer fill operation as a graph node.

#include "../graph_common.hpp"

int main() {

queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}}};

if (!are_graphs_supported(Queue)) {
return 0;
}

const size_t N = 10;
const float Pattern = 3.14f;
std::vector<float> Data(N * N * N);
buffer<float, 3> Buffer{Data.data(), range<3>(N, N, N)};

const uint64_t PatternI64 = 0x3333333355555555;
std::vector<uint64_t> DataI64(N * N * N);
buffer<uint64_t, 3> BufferI64{DataI64.data(), range<3>(N, N, N)};

const uint32_t PatternI32 = 888;
std::vector<uint32_t> DataI32(N * N * N);
buffer<uint32_t, 3> BufferI32{DataI32.data(), range<3>(N, N, N)};

const uint16_t PatternI16 = 777;
std::vector<uint16_t> DataI16(N * N * N);
buffer<uint16_t, 3> BufferI16{DataI16.data(), range<3>(N, N, N)};

const uint8_t PatternI8 = 33;
std::vector<uint8_t> DataI8(N * N * N);
buffer<uint8_t, 3> BufferI8{DataI8.data(), range<3>(N, N, N)};

Buffer.set_write_back(false);
BufferI64.set_write_back(false);
BufferI32.set_write_back(false);
BufferI16.set_write_back(false);
BufferI8.set_write_back(false);
{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = Buffer.get_access(CGH);
CGH.fill(Acc, Pattern);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI64.get_access(CGH);
CGH.fill(Acc, PatternI64);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI32.get_access(CGH);
CGH.fill(Acc, PatternI32);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI16.get_access(CGH);
CGH.fill(Acc, PatternI16);
});

add_node(Graph, Queue, [&](handler &CGH) {
auto Acc = BufferI8.get_access(CGH);
CGH.fill(Acc, PatternI8);
});

auto ExecGraph = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait();
}
host_accessor HostData(Buffer);
host_accessor HostDataI64(BufferI64);
host_accessor HostDataI32(BufferI32);
host_accessor HostDataI16(BufferI16);
host_accessor HostDataI8(BufferI8);
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
for (int z = 0; z < N; z++) {
assert(HostData[i][j][z] == Pattern);
assert(HostDataI64[i][j][z] == PatternI64);
assert(HostDataI32[i][j][z] == PatternI32);
assert(HostDataI16[i][j][z] == PatternI16);
assert(HostDataI8[i][j][z] == PatternI8);
}

return 0;
}
}
}
11 changes: 11 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/buffer_fill_2d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %}
//
// TODO enable cuda once buffer issue investigated and fixed
// UNSUPPORTED: cuda

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/buffer_fill_2d.cpp"
11 changes: 11 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/buffer_fill_3d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %}
//
// TODO enable cuda once buffer issue investigated and fixed
// UNSUPPORTED: cuda

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/buffer_fill_3d.cpp"

0 comments on commit 78c30da

Please sign in to comment.