From 66a019967695807ee37b80c3b0fca7b9d09bff26 Mon Sep 17 00:00:00 2001 From: Jason Li Date: Tue, 2 Jul 2024 16:31:50 -0400 Subject: [PATCH 1/4] [SYCL][ClangLinkerWrapper] Make all temp files use different names (#14243) Resolve issue with -save-temps regarding a temp file being linked to itself. --------- Co-authored-by: Marcos Maronas --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 158e3978e70e7..e24b1c034275f 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -211,7 +211,9 @@ Expected createOutputFile(const Twine &Prefix, StringRef Extension) { std::scoped_lock Lock(TempFilesMutex); SmallString<128> OutputFile; if (SaveTemps) { - (Prefix + "." + Extension).toNullTerminatedStringRef(OutputFile); + // Generate a unique path name without creating a file + sys::fs::createUniquePath(Prefix + "-%%%%%%." + Extension, OutputFile, + /*MakeAbsolute=*/false); } else { if (std::error_code EC = sys::fs::createTemporaryFile(Prefix, Extension, OutputFile)) From 403cea0f3b48c891d9fff81ad1505b15b26e663d Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Tue, 2 Jul 2024 21:39:01 -0700 Subject: [PATCH 2/4] [NFC] Move FPBuiltinFnSelection pass to Transforms (#14339) This moves the FPBuiltinFnSelection pass to the llvm/lib/Transforms/Scalar directory. This is needed to enable future changes that will run this pass as part of the main pipeline for device compilation. With this pass in CodeGen, the slibs build would fail. --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 2 +- .../{CodeGen => Transforms/Scalar}/FPBuiltinFnSelection.h | 8 ++++---- llvm/lib/CodeGen/CMakeLists.txt | 1 - llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + .../Scalar}/FPBuiltinFnSelection.cpp | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) rename llvm/include/llvm/{CodeGen => Transforms/Scalar}/FPBuiltinFnSelection.h (74%) rename llvm/lib/{CodeGen => Transforms/Scalar}/FPBuiltinFnSelection.cpp (98%) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 12f99f4d5a2ae..f140359f729ab 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -29,7 +29,6 @@ #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandReductions.h" -#include "llvm/CodeGen/FPBuiltinFnSelection.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/CodeGen/IndirectBrExpand.h" @@ -65,6 +64,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/FPBuiltinFnSelection.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" diff --git a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h b/llvm/include/llvm/Transforms/Scalar/FPBuiltinFnSelection.h similarity index 74% rename from llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h rename to llvm/include/llvm/Transforms/Scalar/FPBuiltinFnSelection.h index 9a27dbe3da38f..228137cb38a75 100644 --- a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h +++ b/llvm/include/llvm/Transforms/Scalar/FPBuiltinFnSelection.h @@ -1,4 +1,4 @@ -//===- FPBuiltinFnSelection.h - Pre-ISel intrinsic lowering pass ----------===// +//===- FPBuiltinFnSelection.h - fpbuiltin intrinsic lowering pass ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,8 +10,8 @@ // llvm.fpbuiltin.* intrinsics. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_FPBUILTINFNSELECTION_H -#define LLVM_CODEGEN_FPBUILTINFNSELECTION_H +#ifndef LLVM_TRANSFORMS_SCALAR_FPBUILTINFNSELECTION_H +#define LLVM_TRANSFORMS_SCALAR_FPBUILTINFNSELECTION_H #include "llvm/IR/PassManager.h" @@ -25,4 +25,4 @@ struct FPBuiltinFnSelectionPass : PassInfoMixin { } // end namespace llvm -#endif // LLVM_CODEGEN_FPBUILTINFNSELECTION_H +#endif // LLVM_TRANSFORMS_SCALAR_FPBUILTINFNSELECTION_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 2658b70309674..77bf1b165d0cf 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -63,7 +63,6 @@ add_llvm_component_library(LLVMCodeGen ExpandVectorPredication.cpp FaultMaps.cpp FEntryInserter.cpp - FPBuiltinFnSelection.cpp FinalizeISel.cpp FixupStatepointCallerSaved.cpp FuncletLayout.cpp diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index ba09ebf8b04c4..079926a65b10c 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMScalarOpts EarlyCSE.cpp FlattenCFGPass.cpp Float2Int.cpp + FPBuiltinFnSelection.cpp GuardWidening.cpp GVN.cpp GVNHoist.cpp diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp similarity index 98% rename from llvm/lib/CodeGen/FPBuiltinFnSelection.cpp rename to llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp index a4b6a34a2326a..dd423b42ab0ec 100644 --- a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp +++ b/llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp @@ -1,4 +1,4 @@ -//===- FPBuiltinFnSelection.cpp - Pre-ISel intrinsic lowering pass --------===// +//===- FPBuiltinFnSelection.cpp - fpbuiltin intrinsic lowering pass -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/FPBuiltinFnSelection.h" +#include "llvm/Transforms/Scalar/FPBuiltinFnSelection.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" From 2af069de5e8967e40c768f9f28171ca4dcf74491 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Wed, 3 Jul 2024 09:46:27 +0200 Subject: [PATCH 3/4] [SYCL][Fusion] Improve kernel fusion e2e tests (#14366) Improve the kernel fusion end-to-end tests: * Remove the flag to embed IR on CUDA and AMD backend from the tests that abort execution before requiring access to IR. * Check for the number of kernel launches to make sure fusion did not fail silently. --------- Signed-off-by: Lukas Sommer --- sycl/test-e2e/KernelFusion/cancel_fusion.cpp | 8 ++++-- .../test-e2e/KernelFusion/complete_fusion.cpp | 6 ++++- .../KernelFusion/cooperative_kernel.cpp | 2 +- .../KernelFusion/different_nd_ranges.cpp | 26 +++++-------------- .../KernelFusion/event_wait_cancel.cpp | 2 +- .../internal_explicit_dependency.cpp | 6 ++++- .../KernelFusion/jit_caching_multitarget.cpp | 2 +- sycl/test-e2e/KernelFusion/math_function.cpp | 6 ++++- sycl/test-e2e/KernelFusion/non-kernel-cg.cpp | 2 +- .../KernelFusion/non_unit_local_size.cpp | 6 ++++- .../test-e2e/KernelFusion/sync_acc_mem_op.cpp | 2 +- .../KernelFusion/sync_buffer_destruction.cpp | 2 +- .../test-e2e/KernelFusion/sync_event_wait.cpp | 2 +- .../KernelFusion/sync_host_accessor.cpp | 2 +- sycl/test-e2e/KernelFusion/sync_host_task.cpp | 2 +- .../KernelFusion/sync_queue_destruction.cpp | 2 +- .../test-e2e/KernelFusion/sync_queue_wait.cpp | 2 +- .../KernelFusion/sync_second_queue.cpp | 2 +- .../sync_two_queues_requirement.cpp | 2 +- .../test-e2e/KernelFusion/sync_usm_mem_op.cpp | 2 +- .../KernelFusion/usm_no_dependencies.cpp | 6 ++++- .../KernelFusion/work_group_barrier.cpp | 6 ++++- sycl/test-e2e/KernelFusion/wrapped_usm.cpp | 6 ++++- 23 files changed, 61 insertions(+), 43 deletions(-) diff --git a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp index 9be8d8cbbd140..8141d22534fbb 100644 --- a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp @@ -1,8 +1,12 @@ -// RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: %{build} -o %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test cancel fusion +// As fusion is cancelled, this test launches two kernels. +// CHECK-COUNT-2: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/complete_fusion.cpp b/sycl/test-e2e/KernelFusion/complete_fusion.cpp index f49bb70fc6d9b..e9c79048c77cf 100644 --- a/sycl/test-e2e/KernelFusion/complete_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/complete_fusion.cpp @@ -1,8 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test complete fusion without any internalization +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp b/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp index 0faf05d84e2e0..ed9d4738b5c0d 100644 --- a/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp +++ b/sycl/test-e2e/KernelFusion/cooperative_kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s // Test cooperative kernels are not fused diff --git a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp index d9c99f4a9271f..ce7cbd0432b34 100644 --- a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp +++ b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp @@ -1,11 +1,13 @@ -// RUN: %{build} -o %t.out -// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 \ -// RUN: %{run} %t.out 2>&1 | FileCheck %s +// RUN: %{build} %{embed-ir} -o %t.out +// RUN: env SYCL_PI_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \ +// RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 %{run} %t.out 2>&1 \ +// RUN: | FileCheck %s --implicit-check-not "ERROR: JIT compilation for kernel fusion failed with message:" // Test complete fusion of kernels with different ND-ranges. // Kernels with different ND-ranges should be fused. -// CHECK-NOT: Cannot fuse kernels with different offsets or local sizes +// CHECK-COUNT-26: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch #include @@ -252,22 +254,6 @@ int main() { const auto R5 = {5ul}; test({RangeDesc{{10}, R5}, RangeDesc{{20}, R5}, RangeDesc{{30}, R5}}); - // Two 1-D kernels with different global sizes and a 2-D kernel with more - // work-items and specified (equal) local size. - test({RangeDesc{{10}, R2}, RangeDesc{{20}, R2}, RangeDesc{{10, 10}, {2, 1}}}); - - // Three 2-D kernels with different global sizes. - test({RangeDesc{{10, 15}, {2, 5}}, RangeDesc{{20, 10}, {2, 5}}, - RangeDesc{{10, 5}, {2, 5}}}); - - // Three 3-D kernels with different global sizes. - test({RangeDesc{{10, 4, 2}, {5, 2, 1}}, RangeDesc{{20, 2, 4}, {5, 2, 1}}, - RangeDesc{{10, 2, 4}, {5, 2, 1}}}); - - // 1-D, 2-D and 3-D kernels with different global sizes. - test({RangeDesc{{10}, R5}, RangeDesc{{10, 1}, {5, 1}}, - RangeDesc{{10, 1, 1}, {5, 1, 1}}}); - // Test global sizes that trigger the rounded range kernel insertion. // Note that we lower the RR threshold when running this test. test({RangeDesc{67}, RangeDesc{87}, RangeDesc{64}}); diff --git a/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp b/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp index 76b75d8c3a44d..1ad1b9cc5ba25 100644 --- a/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp +++ b/sycl/test-e2e/KernelFusion/event_wait_cancel.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-usm_shared_allocations -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Test validity of events after cancel_fusion. diff --git a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp index 693abaf37f916..67285414e2a26 100644 --- a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp +++ b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp @@ -1,10 +1,14 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion where one kernel in the fusion list specifies an // explicit dependency (via events) on another kernel in the fusion list. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include "fusion_event_test_common.h" #include diff --git a/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp b/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp index cfb192ecb276b..57c13be6907d6 100644 --- a/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp +++ b/sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp @@ -1,5 +1,5 @@ // REQUIRES: (gpu && (hip || cuda)), cpu -// RUN: %{build} -fsycl-embed-ir -O2 -o %t.out +// RUN: %{build} %{embed-ir} -O2 -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --implicit-check-not "WRONG a VALUE" --implicit-check-not "WRONG b VALUE" // Test caching for JIT fused kernels when devices with different architectures diff --git a/sycl/test-e2e/KernelFusion/math_function.cpp b/sycl/test-e2e/KernelFusion/math_function.cpp index 49f2fd04c5b93..bb4a76f9fbfb7 100644 --- a/sycl/test-e2e/KernelFusion/math_function.cpp +++ b/sycl/test-e2e/KernelFusion/math_function.cpp @@ -1,8 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test fusion of a kernel using a math function. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include diff --git a/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp b/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp index 3ae4b5be72712..60a4fea7837df 100644 --- a/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp +++ b/sycl/test-e2e/KernelFusion/non-kernel-cg.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s // Test non-kernel device command groups are not fused diff --git a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp index db369b8ee0f37..1cc03bc7ad0c3 100644 --- a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp +++ b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp @@ -1,9 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with local internalization specified on the // accessors, where each work-item processes multiple data-items. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp b/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp index 825b0818eb344..80a67c7d9dc47 100644 --- a/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp +++ b/sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp b/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp index a221568e4e4f8..a0564b8d9adc1 100644 --- a/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp +++ b/sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_event_wait.cpp b/sycl/test-e2e/KernelFusion/sync_event_wait.cpp index 9f93852dbcc82..eb70f197cde3d 100644 --- a/sycl/test-e2e/KernelFusion/sync_event_wait.cpp +++ b/sycl/test-e2e/KernelFusion/sync_event_wait.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Test fusion cancellation on event::wait() happening before diff --git a/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp b/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp index 7aa702767d93e..4e45646e8880b 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_accessor.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_host_task.cpp b/sycl/test-e2e/KernelFusion/sync_host_task.cpp index 5b1eaaac745b8..40754d4e7a8d9 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_task.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_task.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp b/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp index 24d50197571c3..a4063e40de041 100644 --- a/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp +++ b/sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp b/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp index 7cf6197c7d1ec..f073e28933e4a 100644 --- a/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp +++ b/sycl/test-e2e/KernelFusion/sync_queue_wait.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_second_queue.cpp b/sycl/test-e2e/KernelFusion/sync_second_queue.cpp index 836d9984e824f..66ad1f7561f11 100644 --- a/sycl/test-e2e/KernelFusion/sync_second_queue.cpp +++ b/sycl/test-e2e/KernelFusion/sync_second_queue.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp b/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp index 9bc209552c35a..969f45625df82 100644 --- a/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp +++ b/sycl/test-e2e/KernelFusion/sync_two_queues_requirement.cpp @@ -1,5 +1,5 @@ // For this test, complete_fusion must be supported. -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Test fusion cancellation for requirement between two active fusions. diff --git a/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp b/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp index fa0ae3f1b31d9..92fb80af18b44 100644 --- a/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp +++ b/sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp index f3152cee4c413..6ad81d579226d 100644 --- a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp +++ b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp @@ -1,9 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using USM pointers. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp index 39a5a4cdf57ca..fe09e4fb3050e 100644 --- a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp +++ b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp @@ -1,9 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with a combination of kernels that require a work-group // barrier to be inserted by fusion. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include diff --git a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp index 1254026223aa7..f011ad3b297be 100644 --- a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp +++ b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp @@ -1,9 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using an wrapped USM pointer as kernel functor argument. +// The two kernels are fused, so only a single, fused kernel is launched. +// CHECK-COUNT-1: piEnqueueKernelLaunch +// CHECK-NOT: piEnqueueKernelLaunch + #include #include #include From 139e05b2eca1f54a9981e815039a19dac10d2908 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 3 Jul 2024 11:16:45 +0200 Subject: [PATCH 4/4] [LLVM] Add missing TargetParser link in FPBuiltinFnSelection (#14416) https://github.com/intel/llvm/pull/14339 moved the FPBuiltinFnSelection pass, but post-commit testing is failing due to the Transform/Scalar CMake not linking with TargetParser. This commit fixes this issue. Signed-off-by: Larsen, Steffen --- llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 079926a65b10c..abd119d43df26 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -98,4 +98,5 @@ add_llvm_component_library(LLVMScalarOpts InstCombine Support TransformUtils + TargetParser )