diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 158e3978e70e7..e24b1c034275f 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -211,7 +211,9 @@ Expected createOutputFile(const Twine &Prefix, StringRef Extension) { std::scoped_lock Lock(TempFilesMutex); SmallString<128> OutputFile; if (SaveTemps) { - (Prefix + "." + Extension).toNullTerminatedStringRef(OutputFile); + // Generate a unique path name without creating a file + sys::fs::createUniquePath(Prefix + "-%%%%%%." + Extension, OutputFile, + /*MakeAbsolute=*/false); } else { if (std::error_code EC = sys::fs::createTemporaryFile(Prefix, Extension, OutputFile)) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 12f99f4d5a2ae..f140359f729ab 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -29,7 +29,6 @@ #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandReductions.h" -#include "llvm/CodeGen/FPBuiltinFnSelection.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/CodeGen/IndirectBrExpand.h" @@ -65,6 +64,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/FPBuiltinFnSelection.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" diff --git a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h b/llvm/include/llvm/Transforms/Scalar/FPBuiltinFnSelection.h similarity index 74% rename from llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h rename to llvm/include/llvm/Transforms/Scalar/FPBuiltinFnSelection.h index 9a27dbe3da38f..228137cb38a75 100644 --- a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h +++ b/llvm/include/llvm/Transforms/Scalar/FPBuiltinFnSelection.h @@ -1,4 +1,4 @@ -//===- FPBuiltinFnSelection.h - Pre-ISel intrinsic lowering pass ----------===// +//===- FPBuiltinFnSelection.h - fpbuiltin intrinsic lowering pass ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,8 +10,8 @@ // llvm.fpbuiltin.* intrinsics. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_FPBUILTINFNSELECTION_H -#define LLVM_CODEGEN_FPBUILTINFNSELECTION_H +#ifndef LLVM_TRANSFORMS_SCALAR_FPBUILTINFNSELECTION_H +#define LLVM_TRANSFORMS_SCALAR_FPBUILTINFNSELECTION_H #include "llvm/IR/PassManager.h" @@ -25,4 +25,4 @@ struct FPBuiltinFnSelectionPass : PassInfoMixin { } // end namespace llvm -#endif // LLVM_CODEGEN_FPBUILTINFNSELECTION_H +#endif // LLVM_TRANSFORMS_SCALAR_FPBUILTINFNSELECTION_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 2658b70309674..77bf1b165d0cf 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -63,7 +63,6 @@ add_llvm_component_library(LLVMCodeGen ExpandVectorPredication.cpp FaultMaps.cpp FEntryInserter.cpp - FPBuiltinFnSelection.cpp FinalizeISel.cpp FixupStatepointCallerSaved.cpp FuncletLayout.cpp diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index ba09ebf8b04c4..abd119d43df26 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMScalarOpts EarlyCSE.cpp FlattenCFGPass.cpp Float2Int.cpp + FPBuiltinFnSelection.cpp GuardWidening.cpp GVN.cpp GVNHoist.cpp @@ -97,4 +98,5 @@ add_llvm_component_library(LLVMScalarOpts InstCombine Support TransformUtils + TargetParser ) diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp similarity index 98% rename from llvm/lib/CodeGen/FPBuiltinFnSelection.cpp rename to llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp index a4b6a34a2326a..dd423b42ab0ec 100644 --- a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp +++ b/llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp @@ -1,4 +1,4 @@ -//===- FPBuiltinFnSelection.cpp - Pre-ISel intrinsic lowering pass --------===// +//===- FPBuiltinFnSelection.cpp - fpbuiltin intrinsic lowering pass -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/FPBuiltinFnSelection.h" +#include "llvm/Transforms/Scalar/FPBuiltinFnSelection.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp index 9be8d8cbbd140..8141d22534fbb 100644 --- a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp @@ -1,8 +1,12 @@ -// RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: %{build} -o %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test cancel fusion +// As fusion is cancelled, this test launches two kernels. +// CHECK-COUNT-2: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/complete_fusion.cpp b/sycl/test-e2e/KernelFusion/complete_fusion.cpp index f49bb70fc6d9b..e9c79048c77cf 100644 --- a/sycl/test-e2e/KernelFusion/complete_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/complete_fusion.cpp @@ -1,8 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test complete fusion without any internalization +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp b/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp index 0faf05d84e2e0..ed9d4738b5c0d 100644 --- a/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp +++ b/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s // Test cooperative kernels are not fused diff --git a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp index d9c99f4a9271f..ce7cbd0432b34 100644 --- a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp +++ b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp @@ -1,11 +1,13 @@ -// RUN: %{build} -o %t.out -// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 \ -// RUN: %{run} %t.out 2>&1 | FileCheck %s +// RUN: %{build} %{embed-ir} -o %t.out +// RUN: env SYCL_PI_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \ +// RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 %{run} %t.out 2>&1 \ +// RUN: | FileCheck %s --implicit-check-not "ERROR: JIT compilation for kernel fusion failed with message:" // Test complete fusion of kernels with different ND-ranges. // Kernels with different ND-ranges should be fused. -// CHECK-NOT: Cannot fuse kernels with different offsets or local sizes +// CHECK-COUNT-26: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch #include @@ -252,22 +254,6 @@ int main() { const auto R5 = {5ul}; test({RangeDesc{{10}, R5}, RangeDesc{{20}, R5}, RangeDesc{{30}, R5}}); - // Two 1-D kernels with different global sizes and a 2-D kernel with more - // work-items and specified (equal) local size. - test({RangeDesc{{10}, R2}, RangeDesc{{20}, R2}, RangeDesc{{10, 10}, {2, 1}}}); - - // Three 2-D kernels with different global sizes. - test({RangeDesc{{10, 15}, {2, 5}}, RangeDesc{{20, 10}, {2, 5}}, - RangeDesc{{10, 5}, {2, 5}}}); - - // Three 3-D kernels with different global sizes. - test({RangeDesc{{10, 4, 2}, {5, 2, 1}}, RangeDesc{{20, 2, 4}, {5, 2, 1}}, - RangeDesc{{10, 2, 4}, {5, 2, 1}}}); - - // 1-D, 2-D and 3-D kernels with different global sizes. - test({RangeDesc{{10}, R5}, RangeDesc{{10, 1}, {5, 1}}, - RangeDesc{{10, 1, 1}, {5, 1, 1}}}); - // Test global sizes that trigger the rounded range kernel insertion. // Note that we lower the RR threshold when running this test. test({RangeDesc{67}, RangeDesc{87}, RangeDesc{64}}); diff --git a/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp b/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp index 76b75d8c3a44d..1ad1b9cc5ba25 100644 --- a/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp +++ b/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-usm_shared_allocations -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Test validity of events after cancel_fusion. diff --git a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp index 693abaf37f916..67285414e2a26 100644 --- a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp +++ b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp @@ -1,10 +1,14 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion where one kernel in the fusion list specifies an // explicit dependency (via events) on another kernel in the fusion list. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include "fusion_event_test_common.h" #include diff --git a/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp b/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp index cfb192ecb276b..57c13be6907d6 100644 --- a/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp +++ b/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp @@ -1,5 +1,5 @@ // REQUIRES: (gpu && (hip || cuda)), cpu -// RUN: %{build} -fsycl-embed-ir -O2 -o %t.out +// RUN: %{build} %{embed-ir} -O2 -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --implicit-check-not "WRONG a VALUE" --implicit-check-not "WRONG b VALUE" // Test caching for JIT fused kernels when devices with different architectures diff --git a/sycl/test-e2e/KernelFusion/math_function.cpp b/sycl/test-e2e/KernelFusion/math_function.cpp index 49f2fd04c5b93..bb4a76f9fbfb7 100644 --- a/sycl/test-e2e/KernelFusion/math_function.cpp +++ b/sycl/test-e2e/KernelFusion/math_function.cpp @@ -1,8 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test fusion of a kernel using a math function. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include diff --git a/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp b/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp index 3ae4b5be72712..60a4fea7837df 100644 --- a/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp +++ b/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s // Test non-kernel device command groups are not fused diff --git a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp index db369b8ee0f37..1cc03bc7ad0c3 100644 --- a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp +++ b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp @@ -1,9 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with local internalization specified on the // accessors, where each work-item processes multiple data-items. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp b/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp index 825b0818eb344..80a67c7d9dc47 100644 --- a/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp +++ b/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp b/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp index a221568e4e4f8..a0564b8d9adc1 100644 --- a/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp +++ b/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_event_wait.cpp b/sycl/test-e2e/KernelFusion/sync_event_wait.cpp index 9f93852dbcc82..eb70f197cde3d 100644 --- a/sycl/test-e2e/KernelFusion/sync_event_wait.cpp +++ b/sycl/test-e2e/KernelFusion/sync_event_wait.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Test fusion cancellation on event::wait() happening before diff --git a/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp b/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp index 7aa702767d93e..4e45646e8880b 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_host_task.cpp b/sycl/test-e2e/KernelFusion/sync_host_task.cpp index 5b1eaaac745b8..40754d4e7a8d9 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_task.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_task.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp b/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp index 24d50197571c3..a4063e40de041 100644 --- a/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp +++ b/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp b/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp index 7cf6197c7d1ec..f073e28933e4a 100644 --- a/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp +++ b/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_second_queue.cpp b/sycl/test-e2e/KernelFusion/sync_second_queue.cpp index 836d9984e824f..66ad1f7561f11 100644 --- a/sycl/test-e2e/KernelFusion/sync_second_queue.cpp +++ b/sycl/test-e2e/KernelFusion/sync_second_queue.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp b/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp index 9bc209552c35a..969f45625df82 100644 --- a/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp +++ b/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp @@ -1,5 +1,5 @@ // For this test, complete_fusion must be supported. -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Test fusion cancellation for requirement between two active fusions. diff --git a/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp b/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp index fa0ae3f1b31d9..92fb80af18b44 100644 --- a/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp +++ b/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp index f3152cee4c413..6ad81d579226d 100644 --- a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp +++ b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp @@ -1,9 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using USM pointers. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp index 39a5a4cdf57ca..fe09e4fb3050e 100644 --- a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp +++ b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp @@ -1,9 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with a combination of kernels that require a work-group // barrier to be inserted by fusion. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp index 1254026223aa7..f011ad3b297be 100644 --- a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp +++ b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp @@ -1,9 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using an wrapped USM pointer as kernel functor argument. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include