forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL][Graph] Fix bug when host-task is submitted to in-order queue (#…
…322) * [SYCL][Graph] Fix bug when host-task is submitted to in-order queue When a host-task is submitted to in-order queue, dependency between this host-task and the successor is explicitly handled. However, when we record an in-order queue, the recorded CG are not part of the regular in-order queue execution sequence. But inter-CG dependancies are managed by the graph implementation. This PR implements this point and ensures that recording an in-order does not impact the normal execution sequence. Tests (e2e and unitest) have been added to check it. Adds and Renames tests (e2e and unitests)
- Loading branch information
1 parent
7429d3f
commit 52a1117
Showing
6 changed files
with
499 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// REQUIRES: cuda || level_zero, gpu | ||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// Tests submitting an host kernel to an in-order queue before recording | ||
// commands from it. | ||
|
||
#include "../graph_common.hpp" | ||
|
||
int main() { | ||
using T = int; | ||
|
||
queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}, | ||
sycl::property::queue::in_order{}}}; | ||
|
||
// Check if device has usm shared allocation | ||
if (!Queue.get_device().has(sycl::aspect::usm_shared_allocations)) | ||
return 0; | ||
|
||
T *TestData = sycl::malloc_shared<T>(Size, Queue); | ||
|
||
ext::oneapi::experimental::command_graph Graph{Queue.get_context(), | ||
Queue.get_device()}; | ||
|
||
Queue.submit([&](handler &CGH) { | ||
CGH.host_task([=]() { | ||
for (size_t i = 0; i < Size; i++) { | ||
TestData[i] = static_cast<T>(i); | ||
} | ||
}); | ||
}); | ||
|
||
Graph.begin_recording(Queue); | ||
|
||
auto GraphEvent = Queue.submit([&](handler &CGH) { | ||
CGH.single_task<class TestKernel2>([=]() { | ||
for (size_t i = 0; i < Size; i++) { | ||
TestData[i] += static_cast<T>(i); | ||
} | ||
}); | ||
}); | ||
|
||
Graph.end_recording(Queue); | ||
|
||
auto GraphExec = Graph.finalize(); | ||
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); | ||
|
||
Queue.submit([&](handler &CGH) { | ||
CGH.single_task<class TestKernel3>([=]() { | ||
for (size_t i = 0; i < Size; i++) { | ||
TestData[i] *= static_cast<T>(i); | ||
} | ||
}); | ||
}); | ||
|
||
Queue.wait_and_throw(); | ||
|
||
for (size_t i = 0; i < Size; i++) { | ||
assert(TestData[i] == ((i + i) * i)); | ||
} | ||
|
||
sycl::free(TestData, Queue); | ||
|
||
return 0; | ||
} |
63 changes: 63 additions & 0 deletions
63
sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// REQUIRES: cuda || level_zero, gpu | ||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// Tests submitting memcpy to an in-order queue before recording | ||
// commands from it. | ||
|
||
#include "../graph_common.hpp" | ||
|
||
int main() { | ||
using T = int; | ||
|
||
queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}, | ||
sycl::property::queue::in_order{}}}; | ||
|
||
// Check if device has usm shared allocation | ||
if (!Queue.get_device().has(sycl::aspect::usm_shared_allocations)) | ||
return 0; | ||
|
||
std::vector<T> TestDataIn(Size); | ||
T *TestData = sycl::malloc_shared<T>(Size, Queue); | ||
T *TestDataOut = sycl::malloc_shared<T>(Size, Queue); | ||
|
||
ext::oneapi::experimental::command_graph Graph{Queue.get_context(), | ||
Queue.get_device()}; | ||
|
||
std::memset(TestDataIn.data(), 1, Size * sizeof(T)); | ||
Queue.memcpy(TestData, TestDataIn.data(), Size * sizeof(T)); | ||
|
||
Graph.begin_recording(Queue); | ||
|
||
auto GraphEvent = Queue.submit([&](handler &CGH) { | ||
CGH.single_task<class TestKernel2>([=]() { | ||
for (size_t i = 0; i < Size; i++) { | ||
TestData[i] += static_cast<T>(i); | ||
} | ||
}); | ||
}); | ||
|
||
Graph.end_recording(Queue); | ||
|
||
auto GraphExec = Graph.finalize(); | ||
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); | ||
|
||
Queue.memcpy(TestDataOut, TestData, Size * sizeof(T)); | ||
|
||
Queue.wait_and_throw(); | ||
|
||
std::vector<T> Reference(Size); | ||
std::memset(Reference.data(), 1, Size * sizeof(T)); | ||
for (size_t i = 0; i < Size; i++) { | ||
Reference[i] += i; | ||
} | ||
|
||
// Check Outputs | ||
for (size_t i = 0; i < Size; i++) { | ||
assert(TestDataOut[i] == Reference[i]); | ||
} | ||
|
||
sycl::free(TestData, Queue); | ||
|
||
return 0; | ||
} |
61 changes: 61 additions & 0 deletions
61
sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// REQUIRES: cuda || level_zero, gpu | ||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// Tests submitting memset to an in-order queue before recording | ||
// commands from it. | ||
|
||
#include "../graph_common.hpp" | ||
|
||
int main() { | ||
using T = int; | ||
|
||
queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}, | ||
sycl::property::queue::in_order{}}}; | ||
|
||
// Check if device has usm shared allocation | ||
if (!Queue.get_device().has(sycl::aspect::usm_shared_allocations)) | ||
return 0; | ||
|
||
T *TestData = sycl::malloc_shared<T>(Size, Queue); | ||
T *TestDataOut = sycl::malloc_shared<T>(Size, Queue); | ||
|
||
ext::oneapi::experimental::command_graph Graph{Queue.get_context(), | ||
Queue.get_device()}; | ||
|
||
Queue.memset(TestData, 1, Size * sizeof(T)); | ||
|
||
Graph.begin_recording(Queue); | ||
|
||
auto GraphEvent = Queue.submit([&](handler &CGH) { | ||
CGH.single_task<class TestKernel2>([=]() { | ||
for (size_t i = 0; i < Size; i++) { | ||
TestData[i] += static_cast<T>(i); | ||
} | ||
}); | ||
}); | ||
|
||
Graph.end_recording(Queue); | ||
|
||
auto GraphExec = Graph.finalize(); | ||
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); | ||
|
||
Queue.memcpy(TestDataOut, TestData, Size * sizeof(T)); | ||
|
||
Queue.wait_and_throw(); | ||
|
||
std::vector<T> Reference(Size); | ||
std::memset(Reference.data(), 1, Size * sizeof(T)); | ||
for (size_t i = 0; i < Size; i++) { | ||
Reference[i] += i; | ||
} | ||
|
||
// Check Outputs | ||
for (size_t i = 0; i < Size; i++) { | ||
assert(TestDataOut[i] == Reference[i]); | ||
} | ||
|
||
sycl::free(TestData, Queue); | ||
|
||
return 0; | ||
} |
Oops, something went wrong.