From 779cbce57b7c02e37c6892127fdaec2fd8e28078 Mon Sep 17 00:00:00 2001 From: "Xue, Bowen" Date: Tue, 27 Feb 2024 14:36:26 -0800 Subject: [PATCH] add loops to get around task sequence destructor loop getting optimized away without balanced property for simple designs --- .../task_sequence_intel_balanced.cpp | 30 +++++++++++-------- .../task_sequence_intel_explicit_get.cpp | 30 +++++++++++-------- .../task_sequence_intel_no_explicit_get.cpp | 18 ++++++----- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/sycl/test/check_device_code/task_sequence_intel_balanced.cpp b/sycl/test/check_device_code/task_sequence_intel_balanced.cpp index 05eb8fbdb6736..c888ec1eed9d5 100644 --- a/sycl/test/check_device_code/task_sequence_intel_balanced.cpp +++ b/sycl/test/check_device_code/task_sequence_intel_balanced.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S -emit-llvm -Xclang -no-enable-noundef-analysis %s -o - | FileCheck %s -// CHECK: [[TASK_SEQUENCE:%.*]] ={{.*}} call spir_func target("spirv.TaskSequenceINTEL") @_Z31__spirv_TaskSequenceCreateINTEL{{.*}}(ptr{{.*}}@_Z8arrayAdd{{.*}}, i32 -1, i32 -1, i32 2, i32 2) +// CHECK: [[TASK_SEQUENCE:%.*]] ={{.*}} call spir_func target("spirv.TaskSequenceINTEL") @_Z31__spirv_TaskSequenceCreateINTEL{{.*}}(ptr{{.*}}@_Z8arrayAdd{{.*}}, i32 -1, i32 -1, i32 0, i32 128) // CHECK: call spir_func void @_Z30__spirv_TaskSequenceAsyncINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]], ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 128) // CHECK-COUNT-1: call spir_func i32 @_Z28__spirv_TaskSequenceGetINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]]) // CHECK: call spir_func void @_Z32__spirv_TaskSequenceReleaseINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]]) @@ -10,7 +10,7 @@ using namespace sycl::ext::intel::experimental; using namespace sycl::ext::oneapi::experimental; -constexpr int NSIZE = 128; +constexpr int kSize = 128; int arrayAdd(int *data1, int *data2, int N) { int ret = 0; @@ -23,20 +23,24 @@ int arrayAdd(int *data1, int *data2, int N) { int main() { sycl::queue myQueue; - int result = 0; + std::vector results(kSize); myQueue.submit([&](sycl::handler &cgh) { - sycl::buffer result_sycl(&result, sycl::range<1>(1)); - auto result_acc = result_sycl.get_access(cgh); - cgh.single_task([=](sycl::kernel_handler kh) { - int d1[NSIZE], d2[NSIZE]; + sycl::buffer buffer_results(results); + sycl::accessor results_acc(buffer_results, sycl::write_only, sycl::no_init); + cgh.single_task([=]() { + int d1[kSize], d2[kSize]; task_sequence, - response_capacity<2>})> - sot_object; - sot_object.async(d1, d2, NSIZE); - result_acc[0] = sot_object.get(); + decltype(properties{balanced, invocation_capacity})> + arrayAddTask; + for (int i = 0; i < kSize; i++) { + arrayAddTask.async(d1, d2, kSize); + } + + for (int i = 0; i < kSize; i++) { + results_acc[i] = arrayAddTask.get(); + } }); }); myQueue.wait(); - return result; + return 0; } \ No newline at end of file diff --git a/sycl/test/check_device_code/task_sequence_intel_explicit_get.cpp b/sycl/test/check_device_code/task_sequence_intel_explicit_get.cpp index 9bf6b29e48cb3..2064aa789f393 100644 --- a/sycl/test/check_device_code/task_sequence_intel_explicit_get.cpp +++ b/sycl/test/check_device_code/task_sequence_intel_explicit_get.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S -emit-llvm -Xclang -no-enable-noundef-analysis %s -o - | FileCheck %s -// CHECK: [[TASK_SEQUENCE:%.*]] ={{.*}} call spir_func target("spirv.TaskSequenceINTEL") @_Z31__spirv_TaskSequenceCreateINTEL{{.*}}(ptr{{.*}}@_Z8arrayAdd{{.*}}, i32 -1, i32 -1, i32 2, i32 2) +// CHECK: [[TASK_SEQUENCE:%.*]] ={{.*}} call spir_func target("spirv.TaskSequenceINTEL") @_Z31__spirv_TaskSequenceCreateINTEL{{.*}}(ptr{{.*}}@_Z8arrayAdd{{.*}}, i32 -1, i32 -1, i32 0, i32 128) // CHECK: call spir_func void @_Z30__spirv_TaskSequenceAsyncINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]], ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 128) // CHECK-COUNT-2: call spir_func i32 @_Z28__spirv_TaskSequenceGetINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]]) // CHECK: call spir_func void @_Z32__spirv_TaskSequenceReleaseINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]]) @@ -10,7 +10,7 @@ using namespace sycl::ext::intel::experimental; using namespace sycl::ext::oneapi::experimental; -constexpr int NSIZE = 128; +constexpr int kSize = 128; int arrayAdd(int *data1, int *data2, int N) { int ret = 0; @@ -23,19 +23,23 @@ int arrayAdd(int *data1, int *data2, int N) { int main() { sycl::queue myQueue; - int result = 0; + std::vector results(kSize); myQueue.submit([&](sycl::handler &cgh) { - sycl::buffer result_sycl(&result, sycl::range<1>(1)); - auto result_acc = result_sycl.get_access(cgh); - cgh.single_task([=](sycl::kernel_handler kh) { - int d1[NSIZE], d2[NSIZE]; - task_sequence, - response_capacity<2>})> - sot_object; - sot_object.async(d1, d2, NSIZE); - result_acc[0] = sot_object.get(); + sycl::buffer buffer_results(results); + sycl::accessor results_acc(buffer_results, sycl::write_only, sycl::no_init); + cgh.single_task([=]() { + int d1[kSize], d2[kSize]; + task_sequence})> + arrayAddTask; + for (int i = 0; i < kSize; i++) { + arrayAddTask.async(d1, d2, kSize); + } + + for (int i = 0; i < kSize; i++) { + results_acc[i] = arrayAddTask.get(); + } }); }); myQueue.wait(); - return result; + return 0; } \ No newline at end of file diff --git a/sycl/test/check_device_code/task_sequence_intel_no_explicit_get.cpp b/sycl/test/check_device_code/task_sequence_intel_no_explicit_get.cpp index 03119283c276a..745e28e63d582 100644 --- a/sycl/test/check_device_code/task_sequence_intel_no_explicit_get.cpp +++ b/sycl/test/check_device_code/task_sequence_intel_no_explicit_get.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S -emit-llvm -Xclang -no-enable-noundef-analysis %s -o - | FileCheck %s -// CHECK: [[TASK_SEQUENCE:%.*]] ={{.*}} call spir_func target("spirv.TaskSequenceINTEL") @_Z31__spirv_TaskSequenceCreateINTEL{{.*}}(ptr{{.*}}@_Z8arrayAdd{{.*}}, i32 -1, i32 -1, i32 2, i32 2) +// CHECK: [[TASK_SEQUENCE:%.*]] ={{.*}} call spir_func target("spirv.TaskSequenceINTEL") @_Z31__spirv_TaskSequenceCreateINTEL{{.*}}(ptr{{.*}}@_Z8arrayAdd{{.*}}, i32 0, i32 1, i32 1, i32 1) // CHECK: call spir_func void @_Z30__spirv_TaskSequenceAsyncINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]], ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 128) // CHECK-COUNT-1: call spir_func i32 @_Z28__spirv_TaskSequenceGetINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]]) // CHECK: call spir_func void @_Z32__spirv_TaskSequenceReleaseINTEL{{.*}}(target("spirv.TaskSequenceINTEL") [[TASK_SEQUENCE]]) @@ -10,7 +10,7 @@ using namespace sycl::ext::intel::experimental; using namespace sycl::ext::oneapi::experimental; -constexpr int NSIZE = 128; +constexpr int kSize = 128; int arrayAdd(int *data1, int *data2, int N) { int ret = 0; @@ -25,12 +25,14 @@ int main() { sycl::queue myQueue; myQueue.submit([&](sycl::handler &cgh) { - cgh.single_task([=](sycl::kernel_handler kh) { - int d1[NSIZE], d2[NSIZE]; - task_sequence, - response_capacity<2>})> - sot_object; - sot_object.async(d1, d2, NSIZE); + cgh.single_task([=]() { + int d1[kSize], d2[kSize]; + task_sequence, use_stall_enable_clusters, + invocation_capacity<1>, + response_capacity<1>})> + arrayAddTask; + arrayAddTask.async(d1, d2, kSize); }); }); myQueue.wait();