From 612771570cc8d629ac3aae0cb3f66bc087eaaa53 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 5 Aug 2024 15:13:51 +0200 Subject: [PATCH] [SYCL] Implement device image properties for virtual functions (#14875) Implementation design explaining those changes in a bigger picture can be found in intel/llvm#10540 Key things implemented here: - device code split to outline virtual functions into separate device images - emission of new properties for virtual functions - generation of `calls-indirectly` LLVM IR attribute for kernels that construct objects with virtual functions, but don't do calls - device image manipulations to cleanup or preserve virtual functions depending on a device image Even though those pieces are technically independent from each other, it is hard to split them apart into separate PRs, because they all have to be either present or absent for existing E2E tests for virtual functions to work. --- .../include/llvm/SYCLLowerIR/ModuleSplitter.h | 5 + llvm/include/llvm/Support/PropertySetIO.h | 1 + .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 67 +++++++- llvm/lib/SYCLLowerIR/ModuleSplitter.cpp | 47 ++++- .../SYCLVirtualFunctionsAnalysis.cpp | 162 ++++++++++++++++-- llvm/lib/Support/PropertySetIO.cpp | 1 + .../calls-indirectly-propagation-1.ll | 35 ++++ .../calls-indirectly-propagation-2.ll | 47 +++++ .../calls-indirectly-propagation-3.ll | 37 ++++ .../calls-indirectly-propagation-4.ll | 45 +++++ .../indirectly-callable-auto-split.ll | 60 +++++++ .../indirectly-callable-per-kernel-split.ll | 67 ++++++++ .../module-cleanup-comdat.ll | 35 ++++ .../virtual-functions/module-cleanup.ll | 44 +++++ .../virtual-functions/properties.ll | 42 +++++ 15 files changed, 676 insertions(+), 19 deletions(-) create mode 100644 llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll create mode 100644 llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll create mode 100644 llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll create mode 100644 llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll create mode 100644 llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll create mode 100644 llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll create mode 100644 llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll create mode 100644 llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll create mode 100644 llvm/test/tools/sycl-post-link/virtual-functions/properties.ll diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h index 5f7e5ba78ed73..0da3706ad3626 100644 --- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h @@ -82,6 +82,11 @@ struct EntryPointGroup { // Scope remains global return Res; } + + // Indicates that this group holds definitions of virtual functions - they + // are outlined into separate device images and should be removed from all + // other modules. The flag is used in ModuleDesc::cleanup + bool HasVirtualFunctionDefinitions = false; }; std::string GroupId; diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index bbda6c548825f..13cb687f3b08b 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -209,6 +209,7 @@ class PropertySetRegistry { static constexpr char SYCL_DEVICE_GLOBALS[] = "SYCL/device globals"; static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements"; static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes"; + static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions"; /// Function for bulk addition of an entire property set in the given /// \p Category . diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index b156f66c2c3c1..0c00134a2effb 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -8,6 +8,9 @@ // See comments in the header. //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Demangle/Demangle.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" @@ -188,6 +191,10 @@ PropSetRegTy computeModuleProperties(const Module &M, if (GlobProps.EmitExportedSymbols) { // extract exported functions if any and save them into property set for (const auto *F : EntryPoints) { + // Virtual functions use a different mechanism of dynamic linking, they + // should not be registered here. + if (F->hasFnAttribute("indirectly-callable")) + continue; // TODO FIXME some of SYCL/ESIMD functions maybe marked with __regcall CC, // so they won't make it into the export list. Should the check be // F->getCallingConv() != CallingConv::SPIR_KERNEL? @@ -201,11 +208,19 @@ PropSetRegTy computeModuleProperties(const Module &M, if (GlobProps.EmitImportedSymbols) { // record imported functions in the property set for (const auto &F : M) { - if ( // A function that can be imported may still be defined in one split - // image. Only add import property if this is not the image where the - // function is defined. - F.isDeclaration() && module_split::canBeImportedFunction(F)) { + // A function that can be imported may still be defined in one split + // image. Only add import property if this is not the image where the + // function is defined. + if (!F.isDeclaration()) + continue; + // Even though virtual functions are considered to be imported by the + // function below, we shouldn't list them in the property because they + // use different mechanism for dynamic linking. + if (F.hasFnAttribute("indirectly-callable")) + continue; + + if (module_split::canBeImportedFunction(F)) { // StripDeadPrototypes is called during module splitting // cleanup. At this point all function decls should have uses. assert(!F.use_empty() && "Function F has no uses"); @@ -354,6 +369,50 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "specConstsReplacedWithDefault", 1); + { // Properties related to virtual functions + StringSet<> UsedVFSets; + bool AddedVFSetProperty = false; + for (const Function &F : M) { + if (F.isDeclaration()) + continue; + + if (F.hasFnAttribute("indirectly-callable")) { + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, + "virtual-functions-set", + F.getFnAttribute("indirectly-callable").getValueAsString()); + AddedVFSetProperty = true; + // Device code split should ensure that virtual functions that belong + // to different sets are split into separate device images and hence + // there is no need to scan other functions. + break; + } + + if (F.hasFnAttribute("calls-indirectly")) { + SmallVector Sets; + F.getFnAttribute("calls-indirectly") + .getValueAsString() + .split(Sets, ',', /* MaxSplits */ -1, /* KeepEmpty */ false); + for (auto Set : Sets) + UsedVFSets.insert(Set); + } + } + + if (!UsedVFSets.empty()) { + assert(!AddedVFSetProperty && + "device image cannot have both virtual-functions-set and " + "uses-virtual-functions-set property"); + SmallString<128> AllSets; + for (auto &It : UsedVFSets) { + if (!AllSets.empty()) + AllSets += ','; + AllSets += It.getKey(); + } + + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, + "uses-virtual-functions-set", AllSets); + } + } + return PropSet; } std::string computeModuleSymbolTable(const Module &M, diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index cea7afb2ffe1a..dc16e58a7d98d 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -136,7 +136,10 @@ bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) { !isGenericBuiltin(F.getName()); } - return false; + // Even if we are emitting only kernels as entry points, virtual functions + // should still be treated as entry points, because they are going to be + // outlined into separate device images and linked in later. + return F.hasFnAttribute("indirectly-callable"); } // Represents "dependency" or "use" graph of global objects (functions and @@ -668,6 +671,22 @@ bool mustPreserveGV(const GlobalValue &GV) { // TODO: try to move all passes (cleanup, spec consts, compile time properties) // in one place and execute MPM.run() only once. void ModuleDesc::cleanup() { + // Any definitions of virtual functions should be removed and turned into + // declarations, they are supposed to be provided by a different module. + if (!EntryPoints.Props.HasVirtualFunctionDefinitions) { + for (Function &F : *M) + if (F.hasFnAttribute("indirectly-callable")) { + F.deleteBody(); + if (F.hasComdat()) + F.setComdat(nullptr); + } + } else { + // Otherwise externalize them so they are not dropped by GlobalDCE + for (Function &F : *M) + if (F.hasFnAttribute("indirectly-callable")) + F.setLinkage(GlobalValue::LinkageTypes::ExternalLinkage); + } + ModuleAnalysisManager MAM; MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); ModulePassManager MPM; @@ -1057,6 +1076,17 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, Categorizer.registerSimpleStringAttributeRule( sycl::utils::ATTR_SYCL_MODULE_ID); + // This attribute marks virtual functions and effectively dictates how they + // should be groupped together. By design we won't split those groups of + // virtual functions further even if functions from the same group use + // different optional features and therefore this rule is put here. + // Strictly speaking, we don't even care about module-id splitting for + // those, but to avoid that we need to refactor the whole categorizer. + // However, this is good enough as it is for an initial version. + // TODO: for AOT use case we shouldn't be outlining those and instead should + // only select those functions which are compatible with the target device + Categorizer.registerSimpleStringAttributeRule("indirectly-callable"); + // Optional features // Note: Add more rules at the end of the list to avoid chaning orders of // output files in existing tests. @@ -1096,8 +1126,19 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, Groups.reserve(EntryPointsMap.size()); // Start with properties of a source module EntryPointGroup::Properties MDProps = MD.getEntryPointGroup().Props; - for (auto &[Key, EntryPoints] : EntryPointsMap) - Groups.emplace_back(Key, std::move(EntryPoints), MDProps); + for (auto &[Key, EntryPoints] : EntryPointsMap) { + bool HasVirtualFunctions = false; + for (auto *F : EntryPoints) { + if (F->hasFnAttribute("indirectly-callable")) { + HasVirtualFunctions = true; + break; + } + } + + auto PropsCopy = MDProps; + PropsCopy.HasVirtualFunctionDefinitions = HasVirtualFunctions; + Groups.emplace_back(Key, std::move(EntryPoints), PropsCopy); + } } bool DoSplit = (Mode != SPLIT_NONE && diff --git a/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp b/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp index 931609a86bdff..d3c82a10529b9 100644 --- a/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp @@ -12,6 +12,11 @@ // - if a kernel submitted without the calls_indirectly property performs // virtual function calls, a diagnostic should be emitted. // +// Additionally, the pass sets "calls-indirectly" attribute for kernels which +// create, but don't call virtual functions. This attribute is needed to emit +// the right device image properties later which will be crucial to ensure +// proper runtime linking. +// //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h" @@ -19,10 +24,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" using namespace llvm; @@ -30,6 +37,7 @@ using namespace llvm; namespace { using CallGraphTy = DenseMap>; +using FuncToFuncMapTy = DenseMap>; void emitDiagnostic(const SmallVector &Stack) { diagnoseSYCLIllegalVirtualFunctionCall(Stack); @@ -59,32 +67,120 @@ void checkKernel(const Function *F, const CallGraphTy &CG) { checkKernelImpl(F, CG, CallStack); } +void computeFunctionToKernelsMappingImpl(Function *Kernel, const Function *F, + const CallGraphTy &CG, + FuncToFuncMapTy &Mapping) { + CallGraphTy::const_iterator It = CG.find(F); + // It could be that the function itself is a leaf and doesn't call anything + if (It == CG.end()) + return; + + Mapping[F].insert(Kernel); + + const SmallPtrSet &Callees = It->getSecond(); + for (const Value *V : Callees) + if (auto *Callee = dyn_cast(V)) + computeFunctionToKernelsMappingImpl(Kernel, Callee, CG, Mapping); +} + +void computeFunctionToKernelsMapping(Function *Kernel, const CallGraphTy &CG, + FuncToFuncMapTy &Mapping) { + // For simplicity we also consider a kernel to be using itself + Mapping[Kernel].insert(Kernel); + + CallGraphTy::const_iterator It = CG.find(Kernel); + // It could be that the kernel doesn't call anything + if (It == CG.end()) + return; + + const SmallPtrSet &Callees = It->getSecond(); + for (const Value *V : Callees) { + auto *Callee = dyn_cast(V); + if (!Callee) + continue; + Mapping[Callee].insert(Kernel); + computeFunctionToKernelsMappingImpl(Kernel, Callee, CG, Mapping); + } +} + +void collectVTablesThatUseFunction( + const Value *V, SmallVectorImpl &VTables) { + for (const auto *U : V->users()) { + // GlobalVariable is also a constant + if (const auto *GV = dyn_cast(U)) { + // The core SYCL specification prohibits ODR use of non-const global + // variables in SYCL kernels. There are extensions like device_global that + // lift some of the limitations, but we still assume that there are no + // globals that reference function pointers other than virtual tables. + VTables.push_back(GV); + } else if (isa(U)) { + // Constant expression like + // ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)) + // Could be a part of vtable initializer + collectVTablesThatUseFunction(U, VTables); + } else if (isa(U)) { + // [3 x ptr addrspace(4)] [ + // ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ...] + collectVTablesThatUseFunction(U, VTables); + } else { + llvm_unreachable("Unhandled type of user"); + } + } +} + +// The same ConstantExpr could be used by two functions +void collectEnclosingFunctions(const Value *V, + SmallPtrSetImpl &Functions) { + if (isa(V)) { + for (const auto *U : V->users()) + collectEnclosingFunctions(U, Functions); + return; + } + + if (auto *I = dyn_cast(V)) { + Functions.insert(I->getFunction()); + return; + } + + llvm_unreachable("Unhandled type of value"); +} + } // namespace PreservedAnalyses SYCLVirtualFunctionsAnalysisPass::run(Module &M, ModuleAnalysisManager &MAM) { CallGraphTy CallGraph; - SmallVector Kernels; + SmallVector AllKernels; + SmallVector KernelsToCheck; + SmallVector IndirectlyCallableFuncs; SetVector WorkList; // Identify list of kernels that we need to check - for (const Function &F : M) { + for (Function &F : M) { + if (F.hasFnAttribute("indirectly-callable")) + IndirectlyCallableFuncs.push_back(&F); + // We only traverse call graphs of SYCL kernels if (F.getCallingConv() != CallingConv::SPIR_KERNEL) continue; - // If a kernel is annotated to use virtual functions, we skip it - if (F.hasFnAttribute("calls-indirectly")) - continue; - - // Otherwise, we build call graph for a kernel to ensure that it does not - // perform virtual function calls since that is prohibited by the core - // SYCL 2020 specification + // We record all the kernels here, because we may end up propagating + // calls-indirectly to them if they use vtables. + AllKernels.push_back(&F); WorkList.insert(&F); - Kernels.push_back(&F); + + // However, we only need to check kernel's call graph if it is not annotated + // to use virtual functions to ensure that it indeed doesn't use them. + if (!F.hasFnAttribute("calls-indirectly")) + KernelsToCheck.push_back(&F); } - // Build call graph for each of them + // If there are no virtual functions to call in a module, then we can skip + // the whole analysis + if (IndirectlyCallableFuncs.empty()) + return PreservedAnalyses::all(); + + // Build call graph for each kernel for (size_t I = 0; I < WorkList.size(); ++I) { const Function *F = WorkList[I]; for (const Instruction &I : instructions(F)) { @@ -109,8 +205,50 @@ SYCLVirtualFunctionsAnalysisPass::run(Module &M, ModuleAnalysisManager &MAM) { } // Emit a diagnostic if a kernel performs virtual function calls - for (const auto *K : Kernels) { + for (auto *K : KernelsToCheck) checkKernel(K, CallGraph); + + // Cache to know which function is used by which kernels + FuncToFuncMapTy FunctionToKernels; + for (auto *K : AllKernels) + computeFunctionToKernelsMapping(K, CallGraph, FunctionToKernels); + + for (const auto *F : IndirectlyCallableFuncs) { + StringRef Set = F->getFnAttribute("indirectly-callable").getValueAsString(); + + SmallVector VTables; + collectVTablesThatUseFunction(F, VTables); + SmallPtrSet KernelsToUpdate; + + for (const auto *GV : VTables) { + // Find functions that use those vtables + SmallPtrSet FunctionsThatUseVTables; + for (const auto *UU : GV->users()) + collectEnclosingFunctions(UU, FunctionsThatUseVTables); + // And collect kernels that use those functions + for (const Function *FF : FunctionsThatUseVTables) + for (auto *K : FunctionToKernels[FF]) + KernelsToUpdate.insert(K); + } + + // Update or attach "calls-indirectly" attribute to those kernels + // indicating that they use virtual functions set 'Set' + for (Function *K : KernelsToUpdate) { + if (!K->hasFnAttribute("calls-indirectly")) + K->addFnAttr("calls-indirectly", Set); + else { + StringRef UsedSets = + K->getFnAttribute("calls-indirectly").getValueAsString(); + if (UsedSets.contains(Set)) + continue; + + K->removeFnAttr("calls-indirectly"); + SmallString<64> NewAttr = UsedSets; + NewAttr += ","; + NewAttr += Set; + K->addFnAttr("calls-indirectly", NewAttr.str()); + } + } } return PreservedAnalyses::all(); diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index f14f8cd5b16cb..2fe7cac00fb14 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -206,6 +206,7 @@ constexpr char PropertySetRegistry::SYCL_IMPORTED_SYMBOLS[]; constexpr char PropertySetRegistry::SYCL_DEVICE_GLOBALS[]; constexpr char PropertySetRegistry::SYCL_DEVICE_REQUIREMENTS[]; constexpr char PropertySetRegistry::SYCL_HOST_PIPES[]; +constexpr char PropertySetRegistry::SYCL_VIRTUAL_FUNCTIONS[]; } // namespace util } // namespace llvm diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll new file mode 100644 index 0000000000000..7d356fecebfbf --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a very basic test intended to check that if a kernel uses a vtable, +; then it should be annotated with an attribute "calls-indirectly" that has +; the same value as a function referenced by that vtable. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +define weak_odr dso_local spir_kernel void @kernel_already_uses(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +; CHECK: @kernel({{.*}} #[[#KERNEL_ATTRS:]] +; CHECK: @kernel_already_uses({{.*}} #[[#KERNEL_ATTRS]] +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" } +attributes #2 = { "calls-indirectly"="set-foo" "sycl-module-id"="v.cpp" } diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll new file mode 100644 index 0000000000000..3bf68812512f8 --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a more complicated version of a test intended to check that if a +; kernel uses a vtable, then it should be annotated with an attribute +; "calls-indirectly" that has the same value as a function referenced by that +; vtable. +; Main thing which is tested here is case when a kernel ends up using more than +; one set of virtual functions. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable_foo = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 +@vtable_bar = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define linkonce_odr spir_func void @bar() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable_foo, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable_bar, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +define weak_odr dso_local spir_kernel void @kernel_already_uses(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #3 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable_bar, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +; CHECK: @kernel({{.*}} #[[#KERNEL_ATTRS:]] +; CHECK: @kernel_already_uses({{.*}} #[[#KERNEL_ATTRS]] +; +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo,set-bar" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-bar" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } +attributes #3 = { "calls-indirectly"="set-foo" "sycl-module-id"="v.cpp" } diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll new file mode 100644 index 0000000000000..e0299fa672f28 --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a more complicated version of a test intended to check that if a +; kernel uses a vtable, then it should be annotated with an attribute +; "calls-indirectly" that has the same value as a function referenced by that +; vtable. +; This test case is focused on more complex vtables where a single vtable can +; bring functions from different sets. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [4 x ptr addrspace(4)] } { [4 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define linkonce_odr spir_func void @bar() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +; CHECK: @kernel{{.*}} #[[#KERNEL_ATTRS:]] +; +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo,set-bar" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-bar" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll new file mode 100644 index 0000000000000..21da86cb120de --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a more complicated version of a test intended to check that if a +; kernel uses a vtable, then it should be annotated with an attribute +; "calls-indirectly" that has the same value as a function referenced by that +; vtable. +; This test case is focused on more complex call graph of a kernel, where vtable +; operation is not performed directly by a kernel, but instead is done by some +; other helper function. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [4 x ptr addrspace(4)] } { [4 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define linkonce_odr spir_func void @bar() #1 { +entry: + ret void +} + +define internal spir_func void @helper(ptr addrspace(1) noundef align 8 %arg) { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %arg, align 8 + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + call void @helper(ptr addrspace(1) %_arg_StorageAcc) + ret void +} + +; CHECK: @kernel{{.*}} #[[#KERNEL_ATTRS:]] +; +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo,set-bar" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-bar" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } + diff --git a/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll new file mode 100644 index 0000000000000..c27f23c2010c6 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll @@ -0,0 +1,60 @@ +; RUN: sycl-post-link -split=auto -S < %s -o %t.table +; +; This test checks that functions marked with "indirectly-callable" LLVM IR +; attribute are outlined into separate device image(s) in accordance with the +; attribute value. +; +; Current device code split implementation may split those groups further if +; they use different optional kernel features for example, but we don't care +; about that subsequent split and don't test it. +; +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; +; RUN: sycl-module-split -split=auto -S < %s -o %t2 +; RUN: FileCheck %s --input-file=%t2_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo +; RUN: FileCheck %s --input-file=%t2_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t2_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; +; CHECK-IR0-DAG: define spir_func void @bar +; CHECK-IR0-DAG: define spir_func void @baz +; CHECK-IR1: define spir_func void @foo +; CHECK-IR2: define weak_odr dso_local spir_kernel void @kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +define spir_func void @foo() #0 { +entry: + ret void +} + +define spir_func void @bar() #1 { +entry: + ret void +} + +define spir_func void @baz() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel() #2 { +entry: + ret void +} + +attributes #0 = { "indirectly-callable"="set-1" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-2" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } diff --git a/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll new file mode 100644 index 0000000000000..fd0a7e9cafb6f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll @@ -0,0 +1,67 @@ +; RUN: sycl-post-link -split=kernel -S < %s -o %t.table +; +; This test checks that functions marked with "indirectly-callable" LLVM IR +; attribute are outlined into separate device image(s) in accordance with the +; attribute value. +; +; This version of the test is focused on per-kernel device code split +; +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not bar +; RUN: FileCheck %s --input-file=%t_3.ll --check-prefix CHECK-IR3 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not baz +; +; RUN: sycl-module-split -split=kernel -S < %s -o %t2 +; RUN: FileCheck %s --input-file=%t2_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t2_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t2_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not bar +; RUN: FileCheck %s --input-file=%t2_3.ll --check-prefix CHECK-IR3 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not baz +; +; CHECK-IR0: define weak_odr dso_local spir_kernel void @kernel +; CHECK-IR1: define spir_func void @foo +; CHECK-IR2: define spir_func void @baz +; CHECK-IR3: define spir_func void @bar + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +define spir_func void @foo() #0 { +entry: + ret void +} + +define spir_func void @bar() #1 { +entry: + ret void +} + +define spir_func void @baz() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel() #2 { +entry: + ret void +} + +attributes #0 = { "indirectly-callable"="set-1" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-2" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } + diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll new file mode 100644 index 0000000000000..42ad601819ab7 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll @@ -0,0 +1,35 @@ +; RUN: sycl-post-link -split=auto -S < %s -o %t.table +; +; Virtual functions cleanup drops their bodies from some of device images +; turning them into declarations, but declarations can't have "comdat" +; attached to them, so this test ensures that we can handle "comdat" without +; crashes. + +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-IR0 +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-IR1 + +; CHECK-IR0: define spir_func void @foo +; CHECK-IR1-DAG: declare spir_func void @foo +; CHECK-IR1-DAG: define weak_odr dso_local spir_kernel void @kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +$foo = comdat any + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 comdat { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" } + diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll new file mode 100644 index 0000000000000..54380d92cf8df --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll @@ -0,0 +1,44 @@ +; RUN: sycl-post-link -split=auto -properties -emit-exported-symbols \ +; RUN: -emit-imported-symbols -emit-only-kernels-as-entry-points \ +; RUN: -support-dynamic-linking \ +; RUN: -S < %s -o %t.table +; +; Virtual functions require some special handling during module cleanup: +; - they are outlined into separate device images and must not be removed from +; there by any DCE or internalization passes (they are externalized), even if +; only kernels are treated as entry points +; - device images that use them should keep declarations of virtual functions, +; but not their bodies +; - no virtual functions should be listed as exported/imported functions + +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-IR0 +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-IR1 +; RUN: FileCheck %s --input-file=%t_0.prop \ +; RUN: --implicit-check-not "SYCL/exported functions" \ +; RUN: --implicit-check-not "SYCL/imported functions" +; RUN: FileCheck %s --input-file=%t_1.prop \ +; RUN: --implicit-check-not "SYCL/exported functions" \ +; RUN: --implicit-check-not "SYCL/imported functions" + +; CHECK-IR0: define spir_func void @foo +; CHECK-IR1-DAG: declare spir_func void @foo +; CHECK-IR1-DAG: define weak_odr dso_local spir_kernel void @kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" } diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/properties.ll b/llvm/test/tools/sycl-post-link/virtual-functions/properties.ll new file mode 100644 index 0000000000000..adebb5af611c5 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/properties.ll @@ -0,0 +1,42 @@ +; RUN: sycl-post-link -split=auto -properties -S < %s -o %t.table +; +; Device images with virtual functions in them should have the +; "virtual-functions-set" property under corresponding property set. +; Device images that use virtual functions should have the +; "uses-virtual-functions-set" property under corresponding property set. +; There can't be a device image where both properties are present, they are +; mutually exclusive + +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-IR0 +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-IR1 +; RUN: FileCheck %s --input-file=%t_0.prop --check-prefix=CHECK-PROPS0 +; RUN: FileCheck %s --input-file=%t_1.prop --check-prefix=CHECK-PROPS1 + +; CHECK-IR0: define spir_func void @foo +; CHECK-IR1: define weak_odr dso_local spir_kernel void @kernel +; CHECK-PROPS0: [SYCL/virtual functions] +; CHECK-PROPS0-NEXT: virtual-functions-set=2|4AAAAAAAAAwclRXLm92b +; CHECK-PROPS0-NOT: uses-virtual-functions-set +; CHECK-PROPS1: [SYCL/virtual functions] +; CHECK-PROPS1-NEXT: uses-virtual-functions-set=2|4AAAAAAAAAwclRXLm92b +; CHECK-PROPS1-NOT: virtual-functions-set + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" "calls-indirectly"="set-foo" } +