diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h index 5f7e5ba78ed73..0da3706ad3626 100644 --- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h @@ -82,6 +82,11 @@ struct EntryPointGroup { // Scope remains global return Res; } + + // Indicates that this group holds definitions of virtual functions - they + // are outlined into separate device images and should be removed from all + // other modules. The flag is used in ModuleDesc::cleanup + bool HasVirtualFunctionDefinitions = false; }; std::string GroupId; diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index bbda6c548825f..13cb687f3b08b 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -209,6 +209,7 @@ class PropertySetRegistry { static constexpr char SYCL_DEVICE_GLOBALS[] = "SYCL/device globals"; static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements"; static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes"; + static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions"; /// Function for bulk addition of an entire property set in the given /// \p Category . diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index b156f66c2c3c1..0c00134a2effb 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -8,6 +8,9 @@ // See comments in the header. //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Demangle/Demangle.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" @@ -188,6 +191,10 @@ PropSetRegTy computeModuleProperties(const Module &M, if (GlobProps.EmitExportedSymbols) { // extract exported functions if any and save them into property set for (const auto *F : EntryPoints) { + // Virtual functions use a different mechanism of dynamic linking, they + // should not be registered here. + if (F->hasFnAttribute("indirectly-callable")) + continue; // TODO FIXME some of SYCL/ESIMD functions maybe marked with __regcall CC, // so they won't make it into the export list. Should the check be // F->getCallingConv() != CallingConv::SPIR_KERNEL? @@ -201,11 +208,19 @@ PropSetRegTy computeModuleProperties(const Module &M, if (GlobProps.EmitImportedSymbols) { // record imported functions in the property set for (const auto &F : M) { - if ( // A function that can be imported may still be defined in one split - // image. Only add import property if this is not the image where the - // function is defined. - F.isDeclaration() && module_split::canBeImportedFunction(F)) { + // A function that can be imported may still be defined in one split + // image. Only add import property if this is not the image where the + // function is defined. + if (!F.isDeclaration()) + continue; + // Even though virtual functions are considered to be imported by the + // function below, we shouldn't list them in the property because they + // use different mechanism for dynamic linking. + if (F.hasFnAttribute("indirectly-callable")) + continue; + + if (module_split::canBeImportedFunction(F)) { // StripDeadPrototypes is called during module splitting // cleanup. At this point all function decls should have uses. assert(!F.use_empty() && "Function F has no uses"); @@ -354,6 +369,50 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "specConstsReplacedWithDefault", 1); + { // Properties related to virtual functions + StringSet<> UsedVFSets; + bool AddedVFSetProperty = false; + for (const Function &F : M) { + if (F.isDeclaration()) + continue; + + if (F.hasFnAttribute("indirectly-callable")) { + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, + "virtual-functions-set", + F.getFnAttribute("indirectly-callable").getValueAsString()); + AddedVFSetProperty = true; + // Device code split should ensure that virtual functions that belong + // to different sets are split into separate device images and hence + // there is no need to scan other functions. + break; + } + + if (F.hasFnAttribute("calls-indirectly")) { + SmallVector Sets; + F.getFnAttribute("calls-indirectly") + .getValueAsString() + .split(Sets, ',', /* MaxSplits */ -1, /* KeepEmpty */ false); + for (auto Set : Sets) + UsedVFSets.insert(Set); + } + } + + if (!UsedVFSets.empty()) { + assert(!AddedVFSetProperty && + "device image cannot have both virtual-functions-set and " + "uses-virtual-functions-set property"); + SmallString<128> AllSets; + for (auto &It : UsedVFSets) { + if (!AllSets.empty()) + AllSets += ','; + AllSets += It.getKey(); + } + + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, + "uses-virtual-functions-set", AllSets); + } + } + return PropSet; } std::string computeModuleSymbolTable(const Module &M, diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index cea7afb2ffe1a..dc16e58a7d98d 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -136,7 +136,10 @@ bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) { !isGenericBuiltin(F.getName()); } - return false; + // Even if we are emitting only kernels as entry points, virtual functions + // should still be treated as entry points, because they are going to be + // outlined into separate device images and linked in later. + return F.hasFnAttribute("indirectly-callable"); } // Represents "dependency" or "use" graph of global objects (functions and @@ -668,6 +671,22 @@ bool mustPreserveGV(const GlobalValue &GV) { // TODO: try to move all passes (cleanup, spec consts, compile time properties) // in one place and execute MPM.run() only once. void ModuleDesc::cleanup() { + // Any definitions of virtual functions should be removed and turned into + // declarations, they are supposed to be provided by a different module. + if (!EntryPoints.Props.HasVirtualFunctionDefinitions) { + for (Function &F : *M) + if (F.hasFnAttribute("indirectly-callable")) { + F.deleteBody(); + if (F.hasComdat()) + F.setComdat(nullptr); + } + } else { + // Otherwise externalize them so they are not dropped by GlobalDCE + for (Function &F : *M) + if (F.hasFnAttribute("indirectly-callable")) + F.setLinkage(GlobalValue::LinkageTypes::ExternalLinkage); + } + ModuleAnalysisManager MAM; MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); ModulePassManager MPM; @@ -1057,6 +1076,17 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, Categorizer.registerSimpleStringAttributeRule( sycl::utils::ATTR_SYCL_MODULE_ID); + // This attribute marks virtual functions and effectively dictates how they + // should be groupped together. By design we won't split those groups of + // virtual functions further even if functions from the same group use + // different optional features and therefore this rule is put here. + // Strictly speaking, we don't even care about module-id splitting for + // those, but to avoid that we need to refactor the whole categorizer. + // However, this is good enough as it is for an initial version. + // TODO: for AOT use case we shouldn't be outlining those and instead should + // only select those functions which are compatible with the target device + Categorizer.registerSimpleStringAttributeRule("indirectly-callable"); + // Optional features // Note: Add more rules at the end of the list to avoid chaning orders of // output files in existing tests. @@ -1096,8 +1126,19 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, Groups.reserve(EntryPointsMap.size()); // Start with properties of a source module EntryPointGroup::Properties MDProps = MD.getEntryPointGroup().Props; - for (auto &[Key, EntryPoints] : EntryPointsMap) - Groups.emplace_back(Key, std::move(EntryPoints), MDProps); + for (auto &[Key, EntryPoints] : EntryPointsMap) { + bool HasVirtualFunctions = false; + for (auto *F : EntryPoints) { + if (F->hasFnAttribute("indirectly-callable")) { + HasVirtualFunctions = true; + break; + } + } + + auto PropsCopy = MDProps; + PropsCopy.HasVirtualFunctionDefinitions = HasVirtualFunctions; + Groups.emplace_back(Key, std::move(EntryPoints), PropsCopy); + } } bool DoSplit = (Mode != SPLIT_NONE && diff --git a/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp b/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp index 931609a86bdff..d3c82a10529b9 100644 --- a/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.cpp @@ -12,6 +12,11 @@ // - if a kernel submitted without the calls_indirectly property performs // virtual function calls, a diagnostic should be emitted. // +// Additionally, the pass sets "calls-indirectly" attribute for kernels which +// create, but don't call virtual functions. This attribute is needed to emit +// the right device image properties later which will be crucial to ensure +// proper runtime linking. +// //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h" @@ -19,10 +24,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" using namespace llvm; @@ -30,6 +37,7 @@ using namespace llvm; namespace { using CallGraphTy = DenseMap>; +using FuncToFuncMapTy = DenseMap>; void emitDiagnostic(const SmallVector &Stack) { diagnoseSYCLIllegalVirtualFunctionCall(Stack); @@ -59,32 +67,120 @@ void checkKernel(const Function *F, const CallGraphTy &CG) { checkKernelImpl(F, CG, CallStack); } +void computeFunctionToKernelsMappingImpl(Function *Kernel, const Function *F, + const CallGraphTy &CG, + FuncToFuncMapTy &Mapping) { + CallGraphTy::const_iterator It = CG.find(F); + // It could be that the function itself is a leaf and doesn't call anything + if (It == CG.end()) + return; + + Mapping[F].insert(Kernel); + + const SmallPtrSet &Callees = It->getSecond(); + for (const Value *V : Callees) + if (auto *Callee = dyn_cast(V)) + computeFunctionToKernelsMappingImpl(Kernel, Callee, CG, Mapping); +} + +void computeFunctionToKernelsMapping(Function *Kernel, const CallGraphTy &CG, + FuncToFuncMapTy &Mapping) { + // For simplicity we also consider a kernel to be using itself + Mapping[Kernel].insert(Kernel); + + CallGraphTy::const_iterator It = CG.find(Kernel); + // It could be that the kernel doesn't call anything + if (It == CG.end()) + return; + + const SmallPtrSet &Callees = It->getSecond(); + for (const Value *V : Callees) { + auto *Callee = dyn_cast(V); + if (!Callee) + continue; + Mapping[Callee].insert(Kernel); + computeFunctionToKernelsMappingImpl(Kernel, Callee, CG, Mapping); + } +} + +void collectVTablesThatUseFunction( + const Value *V, SmallVectorImpl &VTables) { + for (const auto *U : V->users()) { + // GlobalVariable is also a constant + if (const auto *GV = dyn_cast(U)) { + // The core SYCL specification prohibits ODR use of non-const global + // variables in SYCL kernels. There are extensions like device_global that + // lift some of the limitations, but we still assume that there are no + // globals that reference function pointers other than virtual tables. + VTables.push_back(GV); + } else if (isa(U)) { + // Constant expression like + // ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)) + // Could be a part of vtable initializer + collectVTablesThatUseFunction(U, VTables); + } else if (isa(U)) { + // [3 x ptr addrspace(4)] [ + // ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ...] + collectVTablesThatUseFunction(U, VTables); + } else { + llvm_unreachable("Unhandled type of user"); + } + } +} + +// The same ConstantExpr could be used by two functions +void collectEnclosingFunctions(const Value *V, + SmallPtrSetImpl &Functions) { + if (isa(V)) { + for (const auto *U : V->users()) + collectEnclosingFunctions(U, Functions); + return; + } + + if (auto *I = dyn_cast(V)) { + Functions.insert(I->getFunction()); + return; + } + + llvm_unreachable("Unhandled type of value"); +} + } // namespace PreservedAnalyses SYCLVirtualFunctionsAnalysisPass::run(Module &M, ModuleAnalysisManager &MAM) { CallGraphTy CallGraph; - SmallVector Kernels; + SmallVector AllKernels; + SmallVector KernelsToCheck; + SmallVector IndirectlyCallableFuncs; SetVector WorkList; // Identify list of kernels that we need to check - for (const Function &F : M) { + for (Function &F : M) { + if (F.hasFnAttribute("indirectly-callable")) + IndirectlyCallableFuncs.push_back(&F); + // We only traverse call graphs of SYCL kernels if (F.getCallingConv() != CallingConv::SPIR_KERNEL) continue; - // If a kernel is annotated to use virtual functions, we skip it - if (F.hasFnAttribute("calls-indirectly")) - continue; - - // Otherwise, we build call graph for a kernel to ensure that it does not - // perform virtual function calls since that is prohibited by the core - // SYCL 2020 specification + // We record all the kernels here, because we may end up propagating + // calls-indirectly to them if they use vtables. + AllKernels.push_back(&F); WorkList.insert(&F); - Kernels.push_back(&F); + + // However, we only need to check kernel's call graph if it is not annotated + // to use virtual functions to ensure that it indeed doesn't use them. + if (!F.hasFnAttribute("calls-indirectly")) + KernelsToCheck.push_back(&F); } - // Build call graph for each of them + // If there are no virtual functions to call in a module, then we can skip + // the whole analysis + if (IndirectlyCallableFuncs.empty()) + return PreservedAnalyses::all(); + + // Build call graph for each kernel for (size_t I = 0; I < WorkList.size(); ++I) { const Function *F = WorkList[I]; for (const Instruction &I : instructions(F)) { @@ -109,8 +205,50 @@ SYCLVirtualFunctionsAnalysisPass::run(Module &M, ModuleAnalysisManager &MAM) { } // Emit a diagnostic if a kernel performs virtual function calls - for (const auto *K : Kernels) { + for (auto *K : KernelsToCheck) checkKernel(K, CallGraph); + + // Cache to know which function is used by which kernels + FuncToFuncMapTy FunctionToKernels; + for (auto *K : AllKernels) + computeFunctionToKernelsMapping(K, CallGraph, FunctionToKernels); + + for (const auto *F : IndirectlyCallableFuncs) { + StringRef Set = F->getFnAttribute("indirectly-callable").getValueAsString(); + + SmallVector VTables; + collectVTablesThatUseFunction(F, VTables); + SmallPtrSet KernelsToUpdate; + + for (const auto *GV : VTables) { + // Find functions that use those vtables + SmallPtrSet FunctionsThatUseVTables; + for (const auto *UU : GV->users()) + collectEnclosingFunctions(UU, FunctionsThatUseVTables); + // And collect kernels that use those functions + for (const Function *FF : FunctionsThatUseVTables) + for (auto *K : FunctionToKernels[FF]) + KernelsToUpdate.insert(K); + } + + // Update or attach "calls-indirectly" attribute to those kernels + // indicating that they use virtual functions set 'Set' + for (Function *K : KernelsToUpdate) { + if (!K->hasFnAttribute("calls-indirectly")) + K->addFnAttr("calls-indirectly", Set); + else { + StringRef UsedSets = + K->getFnAttribute("calls-indirectly").getValueAsString(); + if (UsedSets.contains(Set)) + continue; + + K->removeFnAttr("calls-indirectly"); + SmallString<64> NewAttr = UsedSets; + NewAttr += ","; + NewAttr += Set; + K->addFnAttr("calls-indirectly", NewAttr.str()); + } + } } return PreservedAnalyses::all(); diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index f14f8cd5b16cb..2fe7cac00fb14 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -206,6 +206,7 @@ constexpr char PropertySetRegistry::SYCL_IMPORTED_SYMBOLS[]; constexpr char PropertySetRegistry::SYCL_DEVICE_GLOBALS[]; constexpr char PropertySetRegistry::SYCL_DEVICE_REQUIREMENTS[]; constexpr char PropertySetRegistry::SYCL_HOST_PIPES[]; +constexpr char PropertySetRegistry::SYCL_VIRTUAL_FUNCTIONS[]; } // namespace util } // namespace llvm diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll new file mode 100644 index 0000000000000..7d356fecebfbf --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-1.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a very basic test intended to check that if a kernel uses a vtable, +; then it should be annotated with an attribute "calls-indirectly" that has +; the same value as a function referenced by that vtable. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +define weak_odr dso_local spir_kernel void @kernel_already_uses(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +; CHECK: @kernel({{.*}} #[[#KERNEL_ATTRS:]] +; CHECK: @kernel_already_uses({{.*}} #[[#KERNEL_ATTRS]] +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" } +attributes #2 = { "calls-indirectly"="set-foo" "sycl-module-id"="v.cpp" } diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll new file mode 100644 index 0000000000000..3bf68812512f8 --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-2.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a more complicated version of a test intended to check that if a +; kernel uses a vtable, then it should be annotated with an attribute +; "calls-indirectly" that has the same value as a function referenced by that +; vtable. +; Main thing which is tested here is case when a kernel ends up using more than +; one set of virtual functions. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable_foo = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 +@vtable_bar = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define linkonce_odr spir_func void @bar() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable_foo, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable_bar, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +define weak_odr dso_local spir_kernel void @kernel_already_uses(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #3 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable_bar, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +; CHECK: @kernel({{.*}} #[[#KERNEL_ATTRS:]] +; CHECK: @kernel_already_uses({{.*}} #[[#KERNEL_ATTRS]] +; +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo,set-bar" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-bar" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } +attributes #3 = { "calls-indirectly"="set-foo" "sycl-module-id"="v.cpp" } diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll new file mode 100644 index 0000000000000..e0299fa672f28 --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-3.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a more complicated version of a test intended to check that if a +; kernel uses a vtable, then it should be annotated with an attribute +; "calls-indirectly" that has the same value as a function referenced by that +; vtable. +; This test case is focused on more complex vtables where a single vtable can +; bring functions from different sets. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [4 x ptr addrspace(4)] } { [4 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define linkonce_odr spir_func void @bar() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +; CHECK: @kernel{{.*}} #[[#KERNEL_ATTRS:]] +; +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo,set-bar" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-bar" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } diff --git a/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll new file mode 100644 index 0000000000000..21da86cb120de --- /dev/null +++ b/llvm/test/SYCLLowerIR/SYCLVirtualFunctionsAnalysis/calls-indirectly-propagation-4.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -passes=sycl-virtual-functions-analysis %s | FileCheck %s +; +; This is a more complicated version of a test intended to check that if a +; kernel uses a vtable, then it should be annotated with an attribute +; "calls-indirectly" that has the same value as a function referenced by that +; vtable. +; This test case is focused on more complex call graph of a kernel, where vtable +; operation is not performed directly by a kernel, but instead is done by some +; other helper function. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [4 x ptr addrspace(4)] } { [4 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr @bar to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define linkonce_odr spir_func void @bar() #1 { +entry: + ret void +} + +define internal spir_func void @helper(ptr addrspace(1) noundef align 8 %arg) { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %arg, align 8 + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #2 { +entry: + call void @helper(ptr addrspace(1) %_arg_StorageAcc) + ret void +} + +; CHECK: @kernel{{.*}} #[[#KERNEL_ATTRS:]] +; +; CHECK: attributes #[[#KERNEL_ATTRS]] = {{.*}}"calls-indirectly"="set-foo,set-bar" + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-bar" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } + diff --git a/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll new file mode 100644 index 0000000000000..c27f23c2010c6 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-auto-split.ll @@ -0,0 +1,60 @@ +; RUN: sycl-post-link -split=auto -S < %s -o %t.table +; +; This test checks that functions marked with "indirectly-callable" LLVM IR +; attribute are outlined into separate device image(s) in accordance with the +; attribute value. +; +; Current device code split implementation may split those groups further if +; they use different optional kernel features for example, but we don't care +; about that subsequent split and don't test it. +; +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; +; RUN: sycl-module-split -split=auto -S < %s -o %t2 +; RUN: FileCheck %s --input-file=%t2_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo +; RUN: FileCheck %s --input-file=%t2_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t2_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; +; CHECK-IR0-DAG: define spir_func void @bar +; CHECK-IR0-DAG: define spir_func void @baz +; CHECK-IR1: define spir_func void @foo +; CHECK-IR2: define weak_odr dso_local spir_kernel void @kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +define spir_func void @foo() #0 { +entry: + ret void +} + +define spir_func void @bar() #1 { +entry: + ret void +} + +define spir_func void @baz() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel() #2 { +entry: + ret void +} + +attributes #0 = { "indirectly-callable"="set-1" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-2" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } diff --git a/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll new file mode 100644 index 0000000000000..fd0a7e9cafb6f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/indirectly-callable-per-kernel-split.ll @@ -0,0 +1,67 @@ +; RUN: sycl-post-link -split=kernel -S < %s -o %t.table +; +; This test checks that functions marked with "indirectly-callable" LLVM IR +; attribute are outlined into separate device image(s) in accordance with the +; attribute value. +; +; This version of the test is focused on per-kernel device code split +; +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not bar +; RUN: FileCheck %s --input-file=%t_3.ll --check-prefix CHECK-IR3 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not baz +; +; RUN: sycl-module-split -split=kernel -S < %s -o %t2 +; RUN: FileCheck %s --input-file=%t2_0.ll --check-prefix CHECK-IR0 \ +; RUN: --implicit-check-not foo --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t2_1.ll --check-prefix CHECK-IR1 \ +; RUN: --implicit-check-not kernel --implicit-check-not bar \ +; RUN: --implicit-check-not baz +; RUN: FileCheck %s --input-file=%t2_2.ll --check-prefix CHECK-IR2 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not bar +; RUN: FileCheck %s --input-file=%t2_3.ll --check-prefix CHECK-IR3 \ +; RUN: --implicit-check-not kernel --implicit-check-not foo \ +; RUN: --implicit-check-not baz +; +; CHECK-IR0: define weak_odr dso_local spir_kernel void @kernel +; CHECK-IR1: define spir_func void @foo +; CHECK-IR2: define spir_func void @baz +; CHECK-IR3: define spir_func void @bar + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +define spir_func void @foo() #0 { +entry: + ret void +} + +define spir_func void @bar() #1 { +entry: + ret void +} + +define spir_func void @baz() #1 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel() #2 { +entry: + ret void +} + +attributes #0 = { "indirectly-callable"="set-1" "sycl-module-id"="v.cpp" } +attributes #1 = { "indirectly-callable"="set-2" "sycl-module-id"="v.cpp" } +attributes #2 = { "sycl-module-id"="v.cpp" } + diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll new file mode 100644 index 0000000000000..42ad601819ab7 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup-comdat.ll @@ -0,0 +1,35 @@ +; RUN: sycl-post-link -split=auto -S < %s -o %t.table +; +; Virtual functions cleanup drops their bodies from some of device images +; turning them into declarations, but declarations can't have "comdat" +; attached to them, so this test ensures that we can handle "comdat" without +; crashes. + +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-IR0 +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-IR1 + +; CHECK-IR0: define spir_func void @foo +; CHECK-IR1-DAG: declare spir_func void @foo +; CHECK-IR1-DAG: define weak_odr dso_local spir_kernel void @kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +$foo = comdat any + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 comdat { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" } + diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll new file mode 100644 index 0000000000000..54380d92cf8df --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/module-cleanup.ll @@ -0,0 +1,44 @@ +; RUN: sycl-post-link -split=auto -properties -emit-exported-symbols \ +; RUN: -emit-imported-symbols -emit-only-kernels-as-entry-points \ +; RUN: -support-dynamic-linking \ +; RUN: -S < %s -o %t.table +; +; Virtual functions require some special handling during module cleanup: +; - they are outlined into separate device images and must not be removed from +; there by any DCE or internalization passes (they are externalized), even if +; only kernels are treated as entry points +; - device images that use them should keep declarations of virtual functions, +; but not their bodies +; - no virtual functions should be listed as exported/imported functions + +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-IR0 +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-IR1 +; RUN: FileCheck %s --input-file=%t_0.prop \ +; RUN: --implicit-check-not "SYCL/exported functions" \ +; RUN: --implicit-check-not "SYCL/imported functions" +; RUN: FileCheck %s --input-file=%t_1.prop \ +; RUN: --implicit-check-not "SYCL/exported functions" \ +; RUN: --implicit-check-not "SYCL/imported functions" + +; CHECK-IR0: define spir_func void @foo +; CHECK-IR1-DAG: declare spir_func void @foo +; CHECK-IR1-DAG: define weak_odr dso_local spir_kernel void @kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" } diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/properties.ll b/llvm/test/tools/sycl-post-link/virtual-functions/properties.ll new file mode 100644 index 0000000000000..adebb5af611c5 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/properties.ll @@ -0,0 +1,42 @@ +; RUN: sycl-post-link -split=auto -properties -S < %s -o %t.table +; +; Device images with virtual functions in them should have the +; "virtual-functions-set" property under corresponding property set. +; Device images that use virtual functions should have the +; "uses-virtual-functions-set" property under corresponding property set. +; There can't be a device image where both properties are present, they are +; mutually exclusive + +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-IR0 +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-IR1 +; RUN: FileCheck %s --input-file=%t_0.prop --check-prefix=CHECK-PROPS0 +; RUN: FileCheck %s --input-file=%t_1.prop --check-prefix=CHECK-PROPS1 + +; CHECK-IR0: define spir_func void @foo +; CHECK-IR1: define weak_odr dso_local spir_kernel void @kernel +; CHECK-PROPS0: [SYCL/virtual functions] +; CHECK-PROPS0-NEXT: virtual-functions-set=2|4AAAAAAAAAwclRXLm92b +; CHECK-PROPS0-NOT: uses-virtual-functions-set +; CHECK-PROPS1: [SYCL/virtual functions] +; CHECK-PROPS1-NEXT: uses-virtual-functions-set=2|4AAAAAAAAAwclRXLm92b +; CHECK-PROPS1-NOT: virtual-functions-set + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@vtable = linkonce_odr dso_local unnamed_addr addrspace(1) constant { [3 x ptr addrspace(4)] } { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @foo to ptr addrspace(4))] }, align 8 + +define linkonce_odr spir_func void @foo() #0 { +entry: + ret void +} + +define weak_odr dso_local spir_kernel void @kernel(ptr addrspace(1) noundef align 8 %_arg_StorageAcc) #1 { +entry: + store ptr addrspace(1) getelementptr inbounds inrange(-16, 8) (i8, ptr addrspace(1) @vtable, i64 16), ptr addrspace(1) %_arg_StorageAcc, align 8 + ret void +} + +attributes #0 = { "indirectly-callable"="set-foo" "sycl-module-id"="v.cpp" } +attributes #1 = { "sycl-module-id"="v.cpp" "calls-indirectly"="set-foo" } +