forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL][Graph] Support for sycl_ext_oneapi_enqueue_barrier extension (#…
…301) * [SYCL][Graph] Support for sycl_ext_oneapi_enqueue_barrier extension Adds support to handle barrier enqueuing with Record&Replay API. Barriers are implemented as empty nodes enforcing the required dependencies. Adds tests that check 1) correctness of graph structure when barriers have been enqueued, 2) processing behavior, 3) exception throwing if barriers are used within explicit API. Notes: 1) Multi-queues barrier is not supported since it does not make sense with asynchronous graph execution. 2) Barriers can only be used with Record&Replay API, since barriers rely on events to enforce dependencies. * [SYCL][Graph] Adds unitest with multiple barriers and test-e2e Adds unitest with multiple barriers and test-e2e. Corrects some typos. * Update sycl/source/detail/graph_impl.cpp Co-authored-by: Ben Tracy <ben.tracy@codeplay.com> --------- Co-authored-by: Ben Tracy <ben.tracy@codeplay.com>
- Loading branch information
1 parent
5318388
commit adaaaed
Showing
6 changed files
with
377 additions
and
106 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// REQUIRES: level_zero, gpu | ||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
// Extra run to check for leaks in Level Zero using ZE_DEBUG | ||
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} | ||
// | ||
// CHECK-NOT: LEAK | ||
|
||
#include "../graph_common.hpp" | ||
|
||
//// Test Explicit API graph construction with USM. | ||
/// | ||
/// @param Q Command-queue to make kernel submissions to. | ||
/// @param Size Number of elements in the buffers. | ||
/// @param DataA Pointer to first USM allocation to use in kernels. | ||
/// @param DataB Pointer to second USM allocation to use in kernels. | ||
/// @param DataC Pointer to third USM allocation to use in kernels. | ||
/// | ||
/// @return Event corresponding to the exit node of the submission sequence. | ||
template <typename T> | ||
event run_kernels_usm_with_barrier(queue Q, const size_t Size, T *DataA, | ||
T *DataB, T *DataC) { | ||
// Read & write Buffer A | ||
auto EventA = Q.submit([&](handler &CGH) { | ||
CGH.parallel_for(range<1>(Size), [=](item<1> Id) { | ||
auto LinID = Id.get_linear_id(); | ||
DataA[LinID]++; | ||
}); | ||
}); | ||
|
||
Q.ext_oneapi_submit_barrier(); | ||
|
||
// Reads Buffer A | ||
// Read & Write Buffer B | ||
auto EventB = Q.submit([&](handler &CGH) { | ||
CGH.parallel_for(range<1>(Size), [=](item<1> Id) { | ||
auto LinID = Id.get_linear_id(); | ||
DataB[LinID] += DataA[LinID]; | ||
}); | ||
}); | ||
|
||
// Reads Buffer A | ||
// Read & writes Buffer C | ||
auto EventC = Q.submit([&](handler &CGH) { | ||
CGH.parallel_for(range<1>(Size), [=](item<1> Id) { | ||
auto LinID = Id.get_linear_id(); | ||
DataC[LinID] -= DataA[LinID]; | ||
}); | ||
}); | ||
|
||
Q.ext_oneapi_submit_barrier(); | ||
|
||
// Read & write Buffers B and C | ||
auto ExitEvent = Q.submit([&](handler &CGH) { | ||
CGH.parallel_for(range<1>(Size), [=](item<1> Id) { | ||
auto LinID = Id.get_linear_id(); | ||
DataB[LinID]--; | ||
DataC[LinID]--; | ||
}); | ||
}); | ||
return ExitEvent; | ||
} | ||
|
||
int main() { | ||
queue Queue; | ||
|
||
using T = int; | ||
|
||
std::vector<T> DataA(Size), DataB(Size), DataC(Size); | ||
|
||
std::iota(DataA.begin(), DataA.end(), 1); | ||
std::iota(DataB.begin(), DataB.end(), 10); | ||
std::iota(DataC.begin(), DataC.end(), 1000); | ||
|
||
std::vector<T> ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC); | ||
calculate_reference_data(Iterations, Size, ReferenceA, ReferenceB, | ||
ReferenceC); | ||
|
||
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; | ||
|
||
T *PtrA = malloc_device<T>(Size, Queue); | ||
T *PtrB = malloc_device<T>(Size, Queue); | ||
T *PtrC = malloc_device<T>(Size, Queue); | ||
|
||
Queue.copy(DataA.data(), PtrA, Size); | ||
Queue.copy(DataB.data(), PtrB, Size); | ||
Queue.copy(DataC.data(), PtrC, Size); | ||
Queue.wait_and_throw(); | ||
|
||
// Add commands to graph | ||
Graph.begin_recording(Queue); | ||
auto ev = run_kernels_usm_with_barrier(Queue, Size, PtrA, PtrB, PtrC); | ||
Graph.end_recording(Queue); | ||
|
||
auto GraphExec = Graph.finalize(); | ||
|
||
event Event; | ||
for (unsigned n = 0; n < Iterations; n++) { | ||
Event = Queue.submit([&](handler &CGH) { | ||
CGH.depends_on(Event); | ||
CGH.ext_oneapi_graph(GraphExec); | ||
}); | ||
} | ||
Queue.wait_and_throw(); | ||
|
||
Queue.copy(PtrA, DataA.data(), Size); | ||
Queue.copy(PtrB, DataB.data(), Size); | ||
Queue.copy(PtrC, DataC.data(), Size); | ||
Queue.wait_and_throw(); | ||
|
||
free(PtrA, Queue); | ||
free(PtrB, Queue); | ||
free(PtrC, Queue); | ||
|
||
assert(ReferenceA == DataA); | ||
assert(ReferenceB == DataB); | ||
assert(ReferenceC == DataC); | ||
|
||
return 0; | ||
} |
Oops, something went wrong.