Skip to content

Commit

Permalink
[SYCL][Fusion] Improve kernel fusion e2e tests (#14366)
Browse files Browse the repository at this point in the history
Improve the kernel fusion end-to-end tests:
* Remove the flag to embed IR on CUDA and AMD backend from the tests
that abort execution before requiring access to IR.
* Check for the number of kernel launches to make sure fusion did not
fail silently.

---------

Signed-off-by: Lukas Sommer <lukas.sommer@codeplay.com>
  • Loading branch information
sommerlukas authored Jul 3, 2024
1 parent 403cea0 commit 2af069d
Show file tree
Hide file tree
Showing 23 changed files with 61 additions and 43 deletions.
8 changes: 6 additions & 2 deletions sycl/test-e2e/KernelFusion/cancel_fusion.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s

// Test cancel fusion

// As fusion is cancelled, this test launches two kernels.
// CHECK-COUNT-2: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/complete_fusion.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s

// Test complete fusion without any internalization

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/cooperative_kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s

// Test cooperative kernels are not fused
Expand Down
26 changes: 6 additions & 20 deletions sycl/test-e2e/KernelFusion/different_nd_ranges.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 \
// RUN: %{run} %t.out 2>&1 | FileCheck %s
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: env SYCL_PI_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \
// RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 %{run} %t.out 2>&1 \
// RUN: | FileCheck %s --implicit-check-not "ERROR: JIT compilation for kernel fusion failed with message:"

// Test complete fusion of kernels with different ND-ranges.

// Kernels with different ND-ranges should be fused.
// CHECK-NOT: Cannot fuse kernels with different offsets or local sizes
// CHECK-COUNT-26: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>

Expand Down Expand Up @@ -252,22 +254,6 @@ int main() {
const auto R5 = {5ul};
test({RangeDesc{{10}, R5}, RangeDesc{{20}, R5}, RangeDesc{{30}, R5}});

// Two 1-D kernels with different global sizes and a 2-D kernel with more
// work-items and specified (equal) local size.
test({RangeDesc{{10}, R2}, RangeDesc{{20}, R2}, RangeDesc{{10, 10}, {2, 1}}});

// Three 2-D kernels with different global sizes.
test({RangeDesc{{10, 15}, {2, 5}}, RangeDesc{{20, 10}, {2, 5}},
RangeDesc{{10, 5}, {2, 5}}});

// Three 3-D kernels with different global sizes.
test({RangeDesc{{10, 4, 2}, {5, 2, 1}}, RangeDesc{{20, 2, 4}, {5, 2, 1}},
RangeDesc{{10, 2, 4}, {5, 2, 1}}});

// 1-D, 2-D and 3-D kernels with different global sizes.
test({RangeDesc{{10}, R5}, RangeDesc{{10, 1}, {5, 1}},
RangeDesc{{10, 1, 1}, {5, 1, 1}}});

// Test global sizes that trigger the rounded range kernel insertion.
// Note that we lower the RR threshold when running this test.
test({RangeDesc{67}, RangeDesc{87}, RangeDesc{64}});
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/event_wait_cancel.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// Test validity of events after cancel_fusion.
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion where one kernel in the fusion list specifies an
// explicit dependency (via events) on another kernel in the fusion list.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include "fusion_event_test_common.h"

#include <sycl/detail/core.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// REQUIRES: (gpu && (hip || cuda)), cpu
// RUN: %{build} -fsycl-embed-ir -O2 -o %t.out
// RUN: %{build} %{embed-ir} -O2 -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --implicit-check-not "WRONG a VALUE" --implicit-check-not "WRONG b VALUE"

// Test caching for JIT fused kernels when devices with different architectures
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/math_function.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test fusion of a kernel using a math function.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>

#include <sycl/builtins.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/non-kernel-cg.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s

// Test non-kernel device command groups are not fused
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/non_unit_local_size.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion with local internalization specified on the
// accessors, where each work-item processes multiple data-items.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_event_wait.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Test fusion cancellation on event::wait() happening before
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_host_accessor.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_host_task.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_queue_wait.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_second_queue.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// For this test, complete_fusion must be supported.
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Test fusion cancellation for requirement between two active fusions.
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s
// Windows doesn't yet have full shutdown().
// UNSUPPORTED: ze_debug && windows
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion using USM pointers.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/work_group_barrier.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion with a combination of kernels that require a work-group
// barrier to be inserted by fusion.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/wrapped_usm.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion using an wrapped USM pointer as kernel functor argument.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down

0 comments on commit 2af069d

Please sign in to comment.