forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL][Graph] Fix in_order queue with empty nodes
Adding a empty node to a recorded in-order queue resulted in inconsistent dependencies between nodes. This patch fixes this issues and simplifies the adding of empty nodes. Unitests have been added to check node dependencies when recording an in_order queue with and without empty nodes. Fixes Issue: #239
- Loading branch information
1 parent
56929b2
commit 7f683f8
Showing
3 changed files
with
303 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
sycl/test-e2e/Graph/RecordReplay/dotp_in_order_with_empty_nodes.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// REQUIRES: level_zero, gpu | ||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} | ||
// | ||
// CHECK-NOT: LEAK | ||
|
||
// Tests a dotp operation using device USM and an in-order queue with empty nodes. | ||
// The second run is to check that there are no leaks reported with the embedded | ||
// ZE_DEBUG=4 testing capability. | ||
|
||
#include "../graph_common.hpp" | ||
|
||
int main() { | ||
property_list Properties{property::queue::in_order()}; | ||
queue Queue{gpu_selector_v, Properties}; | ||
|
||
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; | ||
|
||
float *Dotp = malloc_device<float>(1, Queue); | ||
|
||
const size_t N = 10; | ||
float *X = malloc_device<float>(N, Queue); | ||
float *Y = malloc_device<float>(N, Queue); | ||
float *Z = malloc_device<float>(N, Queue); | ||
|
||
Graph.begin_recording(Queue); | ||
|
||
auto InitEvent = Queue.submit([&](handler &CGH) { | ||
CGH.parallel_for(N, [=](id<1> it) { | ||
const size_t i = it[0]; | ||
X[i] = 1.0f; | ||
Y[i] = 2.0f; | ||
Z[i] = 3.0f; | ||
}); | ||
}); | ||
|
||
auto Empty1 = Queue.submit([&](handler &) {}); | ||
|
||
auto EventA = Queue.submit([&](handler &CGH) { | ||
CGH.parallel_for(range<1>{N}, [=](id<1> it) { | ||
const size_t i = it[0]; | ||
X[i] = Alpha * X[i] + Beta * Y[i]; | ||
}); | ||
}); | ||
|
||
auto EventB = Queue.submit([&](handler &CGH) { | ||
CGH.parallel_for(range<1>{N}, [=](id<1> it) { | ||
const size_t i = it[0]; | ||
Z[i] = Gamma * Z[i] + Beta * Y[i]; | ||
}); | ||
}); | ||
|
||
auto Empty2 = Queue.submit([&](handler &) {}); | ||
|
||
Queue.submit([&](handler &CGH) { | ||
CGH.single_task([=]() { | ||
for (size_t j = 0; j < N; j++) { | ||
Dotp[0] += X[j] * Z[j]; | ||
} | ||
}); | ||
}); | ||
|
||
Graph.end_recording(); | ||
|
||
auto ExecGraph = Graph.finalize(); | ||
|
||
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); | ||
|
||
float Output; | ||
Queue.memcpy(&Output, Dotp, sizeof(float)).wait(); | ||
|
||
assert(Output == dotp_reference_result(N)); | ||
|
||
sycl::free(Dotp, Queue); | ||
sycl::free(X, Queue); | ||
sycl::free(Y, Queue); | ||
sycl::free(Z, Queue); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters