diff --git a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp index 9be8d8cbbd140..8141d22534fbb 100644 --- a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp @@ -1,8 +1,12 @@ -// RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: %{build} -o %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test cancel fusion +// As fusion is cancelled, this test launches two kernels. +// CHECK-COUNT-2: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/complete_fusion.cpp b/sycl/test-e2e/KernelFusion/complete_fusion.cpp index f49bb70fc6d9b..e9c79048c77cf 100644 --- a/sycl/test-e2e/KernelFusion/complete_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/complete_fusion.cpp @@ -1,8 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test complete fusion without any internalization +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp b/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp index 0faf05d84e2e0..ed9d4738b5c0d 100644 --- a/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp +++ b/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s // Test cooperative kernels are not fused diff --git a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp index d9c99f4a9271f..ce7cbd0432b34 100644 --- a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp +++ b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp @@ -1,11 +1,13 @@ -// RUN: %{build} -o %t.out -// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 \ -// RUN: %{run} %t.out 2>&1 | FileCheck %s +// RUN: %{build} %{embed-ir} -o %t.out +// RUN: env SYCL_PI_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \ +// RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 %{run} %t.out 2>&1 \ +// RUN: | FileCheck %s --implicit-check-not "ERROR: JIT compilation for kernel fusion failed with message:" // Test complete fusion of kernels with different ND-ranges. // Kernels with different ND-ranges should be fused. -// CHECK-NOT: Cannot fuse kernels with different offsets or local sizes +// CHECK-COUNT-26: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch #include @@ -252,22 +254,6 @@ int main() { const auto R5 = {5ul}; test({RangeDesc{{10}, R5}, RangeDesc{{20}, R5}, RangeDesc{{30}, R5}}); - // Two 1-D kernels with different global sizes and a 2-D kernel with more - // work-items and specified (equal) local size. - test({RangeDesc{{10}, R2}, RangeDesc{{20}, R2}, RangeDesc{{10, 10}, {2, 1}}}); - - // Three 2-D kernels with different global sizes. - test({RangeDesc{{10, 15}, {2, 5}}, RangeDesc{{20, 10}, {2, 5}}, - RangeDesc{{10, 5}, {2, 5}}}); - - // Three 3-D kernels with different global sizes. - test({RangeDesc{{10, 4, 2}, {5, 2, 1}}, RangeDesc{{20, 2, 4}, {5, 2, 1}}, - RangeDesc{{10, 2, 4}, {5, 2, 1}}}); - - // 1-D, 2-D and 3-D kernels with different global sizes. - test({RangeDesc{{10}, R5}, RangeDesc{{10, 1}, {5, 1}}, - RangeDesc{{10, 1, 1}, {5, 1, 1}}}); - // Test global sizes that trigger the rounded range kernel insertion. // Note that we lower the RR threshold when running this test. test({RangeDesc{67}, RangeDesc{87}, RangeDesc{64}}); diff --git a/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp b/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp index 76b75d8c3a44d..1ad1b9cc5ba25 100644 --- a/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp +++ b/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-usm_shared_allocations -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Test validity of events after cancel_fusion. diff --git a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp index 693abaf37f916..67285414e2a26 100644 --- a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp +++ b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp @@ -1,10 +1,14 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion where one kernel in the fusion list specifies an // explicit dependency (via events) on another kernel in the fusion list. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include "fusion_event_test_common.h" #include diff --git a/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp b/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp index cfb192ecb276b..57c13be6907d6 100644 --- a/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp +++ b/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp @@ -1,5 +1,5 @@ // REQUIRES: (gpu && (hip || cuda)), cpu -// RUN: %{build} -fsycl-embed-ir -O2 -o %t.out +// RUN: %{build} %{embed-ir} -O2 -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --implicit-check-not "WRONG a VALUE" --implicit-check-not "WRONG b VALUE" // Test caching for JIT fused kernels when devices with different architectures diff --git a/sycl/test-e2e/KernelFusion/math_function.cpp b/sycl/test-e2e/KernelFusion/math_function.cpp index 49f2fd04c5b93..bb4a76f9fbfb7 100644 --- a/sycl/test-e2e/KernelFusion/math_function.cpp +++ b/sycl/test-e2e/KernelFusion/math_function.cpp @@ -1,8 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test fusion of a kernel using a math function. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include diff --git a/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp b/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp index 3ae4b5be72712..60a4fea7837df 100644 --- a/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp +++ b/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s // Test non-kernel device command groups are not fused diff --git a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp index db369b8ee0f37..1cc03bc7ad0c3 100644 --- a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp +++ b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp @@ -1,9 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with local internalization specified on the // accessors, where each work-item processes multiple data-items. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp b/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp index 825b0818eb344..80a67c7d9dc47 100644 --- a/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp +++ b/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp b/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp index a221568e4e4f8..a0564b8d9adc1 100644 --- a/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp +++ b/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_event_wait.cpp b/sycl/test-e2e/KernelFusion/sync_event_wait.cpp index 9f93852dbcc82..eb70f197cde3d 100644 --- a/sycl/test-e2e/KernelFusion/sync_event_wait.cpp +++ b/sycl/test-e2e/KernelFusion/sync_event_wait.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Test fusion cancellation on event::wait() happening before diff --git a/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp b/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp index 7aa702767d93e..4e45646e8880b 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_host_task.cpp b/sycl/test-e2e/KernelFusion/sync_host_task.cpp index 5b1eaaac745b8..40754d4e7a8d9 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_task.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_task.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp b/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp index 24d50197571c3..a4063e40de041 100644 --- a/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp +++ b/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp b/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp index 7cf6197c7d1ec..f073e28933e4a 100644 --- a/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp +++ b/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_second_queue.cpp b/sycl/test-e2e/KernelFusion/sync_second_queue.cpp index 836d9984e824f..66ad1f7561f11 100644 --- a/sycl/test-e2e/KernelFusion/sync_second_queue.cpp +++ b/sycl/test-e2e/KernelFusion/sync_second_queue.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp b/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp index 9bc209552c35a..969f45625df82 100644 --- a/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp +++ b/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp @@ -1,5 +1,5 @@ // For this test, complete_fusion must be supported. -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Test fusion cancellation for requirement between two active fusions. diff --git a/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp b/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp index fa0ae3f1b31d9..92fb80af18b44 100644 --- a/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp +++ b/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp index f3152cee4c413..6ad81d579226d 100644 --- a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp +++ b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp @@ -1,9 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using USM pointers. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp index 39a5a4cdf57ca..fe09e4fb3050e 100644 --- a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp +++ b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp @@ -1,9 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with a combination of kernels that require a work-group // barrier to be inserted by fusion. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp index 1254026223aa7..f011ad3b297be 100644 --- a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp +++ b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp @@ -1,9 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using an wrapped USM pointer as kernel functor argument. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include