From 71a594ae901ec6cd16143678f0748e893cae050e Mon Sep 17 00:00:00 2001 From: Guo Yejun Date: Tue, 20 Feb 2024 03:42:03 -0800 Subject: [PATCH] [SYCL][Graph] submit.cpp: check test result according to spec --- sycl/test-e2e/Graph/Threading/submit.cpp | 41 ++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/sycl/test-e2e/Graph/Threading/submit.cpp b/sycl/test-e2e/Graph/Threading/submit.cpp index 1a815b188630e..5029647b5bdcb 100644 --- a/sycl/test-e2e/Graph/Threading/submit.cpp +++ b/sycl/test-e2e/Graph/Threading/submit.cpp @@ -4,11 +4,11 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Test submitting a graph in a threaded situation. -// Note that we do not check the outputs because multiple concurrent executions -// is indeterministic (and depends on the backend command management). -// However, this test verifies that concurrent graph submissions do not trigger -// errors nor memory leaks. +// Test submitting a graph multiple times in a threaded situation. +// According to spec: If graph is submitted multiple times, dependencies are +// automatically added by the runtime to prevent concurrent executions of an +// identical graph, and so the result is deterministic and we can check the +// results. #include "../graph_common.hpp" @@ -20,6 +20,7 @@ int main() { using T = int; const unsigned NumThreads = std::thread::hardware_concurrency(); + const unsigned SubmitsPerThread = 128; std::vector DataA(Size), DataB(Size), DataC(Size); std::iota(DataA.begin(), DataA.end(), 1); @@ -27,8 +28,8 @@ int main() { std::iota(DataC.begin(), DataC.end(), 1000); std::vector ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC); - calculate_reference_data(NumThreads, Size, ReferenceA, ReferenceB, - ReferenceC); + calculate_reference_data(NumThreads * SubmitsPerThread, Size, ReferenceA, + ReferenceB, ReferenceC); exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; @@ -45,34 +46,40 @@ int main() { run_kernels_usm(Queue, Size, PtrA, PtrB, PtrC); Graph.end_recording(); - std::vector> - GraphExecs; - for (unsigned i = 0; i < NumThreads; ++i) { - GraphExecs.push_back(Graph.finalize()); - } + auto GraphExec = Graph.finalize(); Barrier SyncPoint{NumThreads}; - auto SubmitGraph = [&](int ThreadNum) { + auto SubmitGraph = [&]() { SyncPoint.wait(); - Queue.submit([&](sycl::handler &CGH) { - CGH.ext_oneapi_graph(GraphExecs[ThreadNum]); - }); + for (unsigned i = 0; i < SubmitsPerThread; ++i) { + Queue.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } }; std::vector Threads; Threads.reserve(NumThreads); for (unsigned i = 0; i < NumThreads; ++i) { - Threads.emplace_back(SubmitGraph, i); + Threads.emplace_back(SubmitGraph); } for (unsigned i = 0; i < NumThreads; ++i) { Threads[i].join(); } + Queue.copy(PtrA, DataA.data(), Size); + Queue.copy(PtrB, DataB.data(), Size); + Queue.copy(PtrC, DataC.data(), Size); Queue.wait_and_throw(); + for (int i = 0; i < Size; ++i) { + check_value(i, ReferenceA[i], DataA[i], "A"); + check_value(i, ReferenceB[i], DataB[i], "B"); + check_value(i, ReferenceC[i], DataC[i], "C"); + } + free(PtrA, Queue); free(PtrB, Queue); free(PtrC, Queue);