From 2a690c75924de5feadb4a582d76822b4d4d1d2cf Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 30 Aug 2024 00:09:09 -0700 Subject: [PATCH] [CGData] Global Merge Functions --- llvm/include/llvm/CGData/CodeGenData.h | 11 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + llvm/include/llvm/Transforms/IPO.h | 2 + .../Transforms/IPO/GlobalMergeFunctions.h | 73 ++ llvm/lib/CodeGen/TargetPassConfig.cpp | 3 + llvm/lib/LTO/LTO.cpp | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 2 + .../Transforms/IPO/GlobalMergeFunctions.cpp | 669 ++++++++++++++++++ .../ThinLTO/AArch64/cgdata-merge-local.ll | 62 ++ .../test/ThinLTO/AArch64/cgdata-merge-read.ll | 82 +++ .../AArch64/cgdata-merge-two-rounds.ll | 68 ++ .../ThinLTO/AArch64/cgdata-merge-write.ll | 97 +++ llvm/tools/llvm-lto2/CMakeLists.txt | 1 + llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 + 16 files changed, 1080 insertions(+) create mode 100644 llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h create mode 100644 llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 5d7c74725ccef1..da0e412f2a0e03 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -145,6 +145,9 @@ class CodeGenData { const OutlinedHashTree *getOutlinedHashTree() { return PublishedHashTree.get(); } + const StableFunctionMap *getStableFunctionMap() { + return PublishedStableFunctionMap.get(); + } /// Returns true if we should write codegen data. bool emitCGData() { return EmitCGData; } @@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() { return CodeGenData::getInstance().hasOutlinedHashTree(); } +inline bool hasStableFunctionMap() { + return CodeGenData::getInstance().hasStableFunctionMap(); +} + inline const OutlinedHashTree *getOutlinedHashTree() { return CodeGenData::getInstance().getOutlinedHashTree(); } +inline const StableFunctionMap *getStableFunctionMap() { + return CodeGenData::getInstance().getStableFunctionMap(); +} + inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); } inline void diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 4352099d6dbb99..9aa36d5bb7f801 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry &); void initializeGCModuleInfoPass(PassRegistry &); void initializeGVNLegacyPassPass(PassRegistry &); +void initializeGlobalMergeFuncPass(PassRegistry &); void initializeGlobalMergePass(PassRegistry &); void initializeGlobalsAAWrapperPassPass(PassRegistry &); void initializeHardwareLoopsLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 92b59a66567c95..ea3609a2b4bc71 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -79,6 +79,7 @@ struct ForcePassLinking { (void)llvm::createDomOnlyViewerWrapperPassPass(); (void)llvm::createDomViewerWrapperPassPass(); (void)llvm::createAlwaysInlinerLegacyPass(); + (void)llvm::createGlobalMergeFuncPass(); (void)llvm::createGlobalsAAWrapperPass(); (void)llvm::createInstSimplifyLegacyPass(); (void)llvm::createInstructionCombiningPass(); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 13bc4700d87029..96b5b815132bc0 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -74,6 +74,7 @@ #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" +#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index ee0e35aa618325..86a8654f56997c 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -55,6 +55,8 @@ enum class PassSummaryAction { Export, ///< Export information to summary. }; +Pass *createGlobalMergeFuncPass(); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h new file mode 100644 index 00000000000000..c6989c95751acc --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h @@ -0,0 +1,73 @@ +//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file defines global merge functions pass and related data structure. +/// +//===----------------------------------------------------------------------===// + +#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H +#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StableHashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CGData/StableFunctionMap.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include +#include + +enum class HashFunctionMode { + None, + BuildingHashFuncion, + UsingHashFunction, +}; + +namespace llvm { + +// A vector of locations (the pair of (instruction, operand) indices) reachable +// from a parameter. +using ParamLocs = SmallVector; +// A vector of parameters +using ParamLocsVecTy = SmallVector; +// A map of stable hash to a vector of stable functions + +/// GlobalMergeFunc finds functions which only differ by constants in +/// certain instructions, e.g. resulting from specialized functions of layout +/// compatible types. +/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function +/// hash to find the merge candidate. Similar to the global outliner, we can run +/// codegen twice to collect function merge candidate in the first round, and +/// merge functions globally in the second round. +class GlobalMergeFunc : public ModulePass { + HashFunctionMode MergerMode = HashFunctionMode::None; + + std::unique_ptr LocalFunctionMap; + +public: + static char ID; + + GlobalMergeFunc(); + + void initializeMergerMode(const Module &M); + + bool runOnModule(Module &M) override; + + /// Analyze module to create stable function into LocalFunctionMap. + void analyze(Module &M); + + /// Emit LocalFunctionMap into __llvm_merge section. + void emitFunctionMap(Module &M); + + /// Merge functions in the module using the global function map. + bool merge(Module &M, const StableFunctionMap *FunctionMap); +}; + +} // end namespace llvm +#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 11a7752ef7a381..76b582fbf32c6c 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -141,6 +141,9 @@ static cl::opt EnableMachineOutliner( "Disable all outlining"), // Sentinel value for unspecified option. clEnumValN(RunOutliner::AlwaysOutline, "", ""))); +cl::opt EnableGlobalMergeFunc( + "enable-global-merge-func", cl::Hidden, + cl::desc("Enable global merge functions that are based on hash function")); // Disable the pass to fix unwind information. Whether the pass is included in // the pipeline is controlled via the target options, this option serves as // manual override. diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 90c4e2c3cd131c..08ceae96799878 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -53,6 +53,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 15cb57399d2460..6a36ed64149f93 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMipo GlobalDCE.cpp GlobalOpt.cpp GlobalSplit.cpp + GlobalMergeFunctions.cpp HotColdSplitting.cpp IPO.cpp IROutliner.cpp @@ -60,6 +61,7 @@ add_llvm_component_library(LLVMipo Analysis BitReader BitWriter + CGData Core FrontendOpenMP InstCombine diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp new file mode 100644 index 00000000000000..5117e310b423f6 --- /dev/null +++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp @@ -0,0 +1,669 @@ +//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TODO: This implements a function merge using function hash while tracking +// differences in Constants. This uses stable function hash to find potential +// merge candidates. The first codegen round collects stable function hashes, +// and determines the merge candidates that match the stable function hashes. +// The set of parameters pointing to different Constants are also computed +// during the stable function merge. The second codegen round uses this global +// function info to optimistically create a merged function in each module +// context to guarantee correct transformation. Similar to the global outliner, +// the linker's deduplication (ICF) folds the identical merged functions to save +// the final binary size. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/GlobalMergeFunctions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CGData/CodeGenData.h" +#include "llvm/CGData/StableFunctionMap.h" +#include "llvm/CodeGen/MachineStableHash.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/StructuralHash.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#define DEBUG_TYPE "global-merge-func" + +using namespace llvm; +using namespace llvm::support; + +extern cl::opt EnableGlobalMergeFunc; + +STATISTIC(NumMismatchedFunctionHashGlobalMergeFunction, + "Number of mismatched function hash for global merge function"); +STATISTIC(NumMismatchedInstCountGlobalMergeFunction, + "Number of mismatched instruction count for global merge function"); +STATISTIC(NumMismatchedConstHashGlobalMergeFunction, + "Number of mismatched const hash for global merge function"); +STATISTIC(NumMismatchedModuleIdGlobalMergeFunction, + "Number of mismatched Module Id for global merge function"); +STATISTIC(NumGlobalMergeFunctions, + "Number of functions that are actually merged using function hash"); +STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed"); +STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed"); +STATISTIC(NumEligibleFunctions, "Number of functions that are eligible"); + +/// Returns true if the \opIdx operand of \p CI is the callee operand. +static bool isCalleeOperand(const CallBase *CI, unsigned opIdx) { + return &CI->getCalledOperandUse() == &CI->getOperandUse(opIdx); +} + +static bool canParameterizeCallOperand(const CallBase *CI, unsigned opIdx) { + if (CI->isInlineAsm()) + return false; + Function *Callee = CI->getCalledOperand() + ? dyn_cast_or_null( + CI->getCalledOperand()->stripPointerCasts()) + : nullptr; + if (Callee) { + if (Callee->isIntrinsic()) + return false; + // objc_msgSend stubs must be called, and can't have their address taken. + if (Callee->getName().starts_with("objc_msgSend$")) + return false; + } + if (isCalleeOperand(CI, opIdx) && + CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) { + // The operand is the callee and it has already been signed. Ignore this + // because we cannot add another ptrauth bundle to the call instruction. + return false; + } + return true; +} + +bool isEligibleInstrunctionForConstantSharing(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::Call: + case Instruction::Invoke: + return true; + default: + return false; + } +} + +bool isEligibleOperandForConstantSharing(const Instruction *I, unsigned OpIdx) { + assert(OpIdx < I->getNumOperands() && "Invalid operand index"); + + if (!isEligibleInstrunctionForConstantSharing(I)) + return false; + + auto Opnd = I->getOperand(OpIdx); + if (!isa(Opnd)) + return false; + + if (const auto *CI = dyn_cast(I)) + return canParameterizeCallOperand(CI, OpIdx); + + return true; +} + +/// Returns true if function \p F is eligible for merging. +bool isEligibleFunction(Function *F) { + if (F->isDeclaration()) + return false; + + if (F->hasFnAttribute(llvm::Attribute::NoMerge)) + return false; + + if (F->hasAvailableExternallyLinkage()) { + return false; + } + + if (F->getFunctionType()->isVarArg()) { + return false; + } + + if (F->getCallingConv() == CallingConv::SwiftTail) + return false; + + // if function contains callsites with musttail, if we merge + // it, the merged function will have the musttail callsite, but + // the number of parameters can change, thus the parameter count + // of the callsite will mismatch with the function itself. + // if (IgnoreMusttailFunction) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + const auto *CB = dyn_cast(&I); + if (CB && CB->isMustTailCall()) + return false; + } + } + + // TODO: Add a size constraint for tuning. + + return true; +} + +static bool +isEligibleInstrunctionForConstantSharingLocal(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::Call: + case Instruction::Invoke: + return true; + default: + return false; + } +} + +static bool ignoreOp(const Instruction *I, unsigned OpIdx) { + assert(OpIdx < I->getNumOperands() && "Invalid operand index"); + + if (!isEligibleInstrunctionForConstantSharingLocal(I)) + return false; + + if (!isa(I->getOperand(OpIdx))) + return false; + + if (const auto *CI = dyn_cast(I)) + return canParameterizeCallOperand(CI, OpIdx); + + return true; +} + +// copy from merge functions.cpp +static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { + Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = PoisonValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = + createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestTy->getStructElementType(I)); + + Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); + if (auto *SrcAT = dyn_cast(SrcTy)) { + auto *DestAT = dyn_cast(DestTy); + assert(DestAT); + assert(SrcAT->getNumElements() == DestAT->getNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) { + Value *Element = + createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestAT->getElementType()); + + Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isArrayTy()); + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + return Builder.CreateIntToPtr(V, DestTy); + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + return Builder.CreatePtrToInt(V, DestTy); + else + return Builder.CreateBitCast(V, DestTy); +} + +void GlobalMergeFunc::analyze(Module &M) { + ++NumAnalyzedModues; + for (Function &Func : M) { + ++NumAnalyzedFunctions; + if (isEligibleFunction(&Func)) { + ++NumEligibleFunctions; + + auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp); + + // Convert the map to a vector for a serialization-friendly format. + IndexOperandHashVecType IndexOperandHashes; + for (auto &Pair : *FI.IndexOperandHashMap) + IndexOperandHashes.emplace_back(Pair); + + StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(), + M.getModuleIdentifier(), FI.IndexInstruction->size(), + std::move(IndexOperandHashes)); + + LocalFunctionMap->insert(SF); + } + } +} + +/// Tuple to hold function info to process merging. +struct FuncMergeInfo { + StableFunctionEntry *SF; + Function *F; + std::unique_ptr IndexInstruction; +}; + +// Given the func info, and the parameterized locations, create and return +// a new merged function by replacing the original constants with the new +// parameters. +static Function *createMergedFunction(FuncMergeInfo &FI, + ArrayRef ConstParamTypes, + const ParamLocsVecTy &ParamLocsVec) { + // Synthesize a new merged function name by appending ".Tgm" to the root + // function's name. + auto *MergedFunc = FI.F; + auto NewFunctionName = MergedFunc->getName().str() + ".Tgm"; + auto *M = MergedFunc->getParent(); + assert(!M->getFunction(NewFunctionName)); + + FunctionType *OrigTy = MergedFunc->getFunctionType(); + // Get the original params' types. + SmallVector ParamTypes(OrigTy->param_begin(), OrigTy->param_end()); + // Append const parameter types that are passed in. + ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end()); + FunctionType *FuncType = + FunctionType::get(OrigTy->getReturnType(), ParamTypes, false); + + // Declare a new function + Function *NewFunction = + Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName); + if (auto *SP = MergedFunc->getSubprogram()) + NewFunction->setSubprogram(SP); + NewFunction->copyAttributesFrom(MergedFunc); + NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass); + + NewFunction->setLinkage(GlobalValue::InternalLinkage); + NewFunction->addFnAttr(Attribute::NoInline); + + // Add the new function before the root function. + M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction); + + // Move the body of MergedFunc into the NewFunction. + NewFunction->splice(NewFunction->begin(), MergedFunc); + + // Update the original args by the new args. + auto NewArgIter = NewFunction->arg_begin(); + for (Argument &OrigArg : MergedFunc->args()) { + Argument &NewArg = *NewArgIter++; + OrigArg.replaceAllUsesWith(&NewArg); + } + + // Replace the original Constants by the new args. + unsigned NumOrigArgs = MergedFunc->arg_size(); + for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) { + Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx); + for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) { + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + auto *OrigC = Inst->getOperand(OpndIndex); + if (OrigC->getType() != NewArg->getType()) { + IRBuilder<> Builder(Inst->getParent(), Inst->getIterator()); + Inst->setOperand(OpndIndex, + createCast(Builder, NewArg, OrigC->getType())); + } else + Inst->setOperand(OpndIndex, NewArg); + } + } + + return NewFunction; +} + +// Given the original function (Thunk) and the merged function (ToFunc), create +// a thunk to the merged function. + +static void createThunk(FuncMergeInfo &FI, ArrayRef Params, + Function *ToFunc) { + auto *Thunk = FI.F; + + assert(Thunk->arg_size() + Params.size() == + ToFunc->getFunctionType()->getNumParams()); + Thunk->dropAllReferences(); + + BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk); + IRBuilder<> Builder(BB); + + SmallVector Args; + unsigned ParamIdx = 0; + FunctionType *ToFuncTy = ToFunc->getFunctionType(); + + // Add arguments which are passed through Thunk. + for (Argument &AI : Thunk->args()) { + Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx))); + ++ParamIdx; + } + + // Add new arguments defined by Params. + for (auto *Param : Params) { + assert(ParamIdx < ToFuncTy->getNumParams()); + // FIXME: do not support signing + Args.push_back( + createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx))); + ++ParamIdx; + } + + CallInst *CI = Builder.CreateCall(ToFunc, Args); + bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail && + Thunk->getCallingConv() == CallingConv::SwiftTail; + CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail + : llvm::CallInst::TCK_Tail); + CI->setCallingConv(ToFunc->getCallingConv()); + CI->setAttributes(ToFunc->getAttributes()); + if (Thunk->getReturnType()->isVoidTy()) { + Builder.CreateRetVoid(); + } else { + Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType())); + } +} + +// Check if the old merged/optimized IndexOperandHashMap is compatible with +// the current IndexOperandHashMap. OpndHash may not be stable across +// different builds due to varying modules combined. To address this, we relax +// the hash check condition by comparing Const hash patterns instead of absolute +// hash values. For example, let's assume we have three Consts located at idx1, +// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1 +// in the old merged map below: +// Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)] +// Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')] +// If the current function also has three Consts in the same locations, +// with hash sequences hash1', hash2', and hash1' where the first and third +// are the same as the old hash sequences, we consider them matched. +static bool checkConstHashCompatible( + const DenseMap &OldInstOpndIndexToConstHash, + const DenseMap &CurrInstOpndIndexToConstHash) { + + DenseMap OldHashToCurrHash; + for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) { + auto It = CurrInstOpndIndexToConstHash.find(Index); + if (It == CurrInstOpndIndexToConstHash.end()) + return false; + + auto CurrHash = It->second; + auto J = OldHashToCurrHash.find(OldHash); + if (J == OldHashToCurrHash.end()) + OldHashToCurrHash.insert({OldHash, CurrHash}); + else if (J->second != CurrHash) + return false; + } + + return true; +} + +// Validate the locations pointed by a param has the same hash and Constant. +static bool checkConstLocationCompatible(const StableFunctionEntry &SF, + const IndexInstrMap &IndexInstruction, + const ParamLocsVecTy &ParamLocsVec) { + for (auto &ParamLocs : ParamLocsVec) { + std::optional OldHash; + std::optional OldConst; + for (auto &Loc : ParamLocs) { + assert(SF.IndexOperandHashMap->count(Loc)); + auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc); + auto [InstIndex, OpndIndex] = Loc; + assert(InstIndex < IndexInstruction.size()); + const auto *Inst = IndexInstruction.lookup(InstIndex); + auto *CurrConst = cast(Inst->getOperand(OpndIndex)); + if (!OldHash) { + OldHash = CurrHash; + OldConst = CurrConst; + } else if (CurrConst != *OldConst || CurrHash != *OldHash) + return false; + } + } + return true; +} + +static ParamLocsVecTy +computeParamInfo(const SmallVector> &SFS) { + std::map, ParamLocs> HashSeqToLocs; + auto &RSF = *SFS[0]; + unsigned StableFunctionCount = SFS.size(); + + for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) { + // Const hash sequence across stable functions. + // We will allocate a parameter per unique hash squence. + // can't use SmallVector as key + std::vector ConstHashSeq; + ConstHashSeq.push_back(Hash); + bool Identical = true; + for (unsigned J = 1; J < StableFunctionCount; ++J) { + auto &SF = SFS[J]; + assert(SF->IndexOperandHashMap->count(IndexPair)); + auto SHash = (*SF->IndexOperandHashMap)[IndexPair]; + if (Hash != SHash) + Identical = false; + ConstHashSeq.push_back(SHash); + } + + if (Identical) + continue; + + // For each unique Const hash sequence (parameter), add the locations. + HashSeqToLocs[ConstHashSeq].push_back(IndexPair); + } + + ParamLocsVecTy ParamLocsVec; + for (auto &[HashSeq, Locs] : HashSeqToLocs) { + ParamLocsVec.push_back(std::move(Locs)); + std::sort( + ParamLocsVec.begin(), ParamLocsVec.end(), + [&](const ParamLocs &L, const ParamLocs &R) { return L[0] < R[0]; }); + } + return ParamLocsVec; +} + +bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { + bool Changed = false; + + // Build a map from stable function name to function. + StringMap StableNameToFuncMap; + for (auto &F : M) + StableNameToFuncMap[get_stable_name(F.getName())] = &F; + // Track merged functions + DenseSet MergedFunctions; + + auto ModId = M.getModuleIdentifier(); + for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) { + // No interest if the number of candidates are less than 2. + if (SFS.size() < 2) + continue; + // Compute the parameter locations based on the unique hash sequences + // across the candidates. + auto ParamLocsVec = computeParamInfo(SFS); + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash << " with Params " + << ParamLocsVec.size() << "\n"; + }); + + Function *MergedFunc = nullptr; + std::string MergedModId; + SmallVector FuncMergeInfos; + for (auto &SF : SFS) { + // Get the function from the stable name. + auto I = StableNameToFuncMap.find( + *FunctionMap->getNameForId(SF->FunctionNameId)); + if (I == StableNameToFuncMap.end()) + continue; + Function *F = I->second; + assert(F); + // Skip if the function has been merged before. + if (MergedFunctions.count(F)) + continue; + // Consider the function if it is eligible for merging. + if (!isEligibleFunction(F)) + continue; + + auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp); + uint64_t FuncHash = FI.FunctionHash; + if (Hash != FuncHash) { + ++NumMismatchedFunctionHashGlobalMergeFunction; + continue; + } + + if (SF->InstCount != FI.IndexInstruction->size()) { + ++NumMismatchedInstCountGlobalMergeFunction; + continue; + } + bool HasValidSharedConst = true; + for (auto &[Index, Hash] : *SF->IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = Index; + assert(InstIndex < FI.IndexInstruction->size()); + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + if (!isEligibleOperandForConstantSharing(Inst, OpndIndex)) { + HasValidSharedConst = false; + break; + } + } + if (!HasValidSharedConst) { + ++NumMismatchedConstHashGlobalMergeFunction; + continue; + } + if (!checkConstHashCompatible(*SF->IndexOperandHashMap, + *FI.IndexOperandHashMap)) { + ++NumMismatchedConstHashGlobalMergeFunction; + continue; + } + if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction, + ParamLocsVec)) { + ++NumMismatchedConstHashGlobalMergeFunction; + continue; + } + + if (MergedFunc) { + // Check if the matched functions fall into the same (first) module. + // This module check is not strictly necessary as the functions can move + // around. We just want to avoid merging functions from different + // modules than the first one in the functon map, as they may not end up + // with not being ICFed. + if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) { + ++NumMismatchedModuleIdGlobalMergeFunction; + continue; + } + } else { + MergedFunc = F; + MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId); + } + + FuncMergeInfos.push_back({SF.get(), F, std::move(FI.IndexInstruction)}); + MergedFunctions.insert(F); + } + unsigned FuncMergeInfoSize = FuncMergeInfos.size(); + if (FuncMergeInfoSize == 0) + continue; + + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize + << " in " << ModId << "\n"; + }); + for (auto &FMI : FuncMergeInfos) { + Changed = true; + + // We've already validated all locations of constant operands pointed by + // the parameters. Just use the first one to bookkeep the original + // constants for each parameter + SmallVector Params; + SmallVector ParamTypes; + for (auto &ParamLocs : ParamLocsVec) { + assert(!ParamLocs.empty()); + auto &[InstIndex, OpndIndex] = ParamLocs[0]; + auto *Inst = FMI.IndexInstruction->lookup(InstIndex); + auto *Opnd = cast(Inst->getOperand(OpndIndex)); + Params.push_back(Opnd); + ParamTypes.push_back(Opnd->getType()); + } + + // Create a merged function derived from the first function in the current + // module context. + Function *MergedFunc = + createMergedFunction(FMI, ParamTypes, ParamLocsVec); + + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash + << ") " << MergedFunc->getName() << " generated from " + << FMI.F->getName() << ":\n"; + MergedFunc->dump(); + }); + + // Create a thunk to the merged function. + createThunk(FMI, Params, MergedFunc); + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Thunk generated: \n"; + FMI.F->dump(); + }); + ++NumGlobalMergeFunctions; + } + } + + return Changed; +} + +char GlobalMergeFunc::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalMergeFunc, "global-merge-func", + "Global merge function pass", false, false) +INITIALIZE_PASS_END(GlobalMergeFunc, "global-merge-func", + "Global merge function pass", false, false) + +GlobalMergeFunc::GlobalMergeFunc() : ModulePass(ID) { + initializeGlobalMergeFuncPass(*llvm::PassRegistry::getPassRegistry()); +} + +namespace llvm { +Pass *createGlobalMergeFuncPass() { return new GlobalMergeFunc(); } +} // namespace llvm + +void GlobalMergeFunc::initializeMergerMode(const Module &M) { + // When codegen data write is enabled, we want to write the local outlined + // hash tree to the custom section, `__llvm_outline`. + // When the outlined hash tree is available from the previous codegen data, + // we want to read it to optimistically create global outlining candidates. + LocalFunctionMap = std::make_unique(); + if (cgdata::emitCGData()) + MergerMode = HashFunctionMode::BuildingHashFuncion; + else if (cgdata::hasStableFunctionMap()) + MergerMode = HashFunctionMode::UsingHashFunction; +} + +void GlobalMergeFunc::emitFunctionMap(Module &M) { + LLVM_DEBUG({ + dbgs() << "Emit function map. Size: " << LocalFunctionMap->size() << "\n"; + }); + SmallVector Buf; + raw_svector_ostream OS(Buf); + + StableFunctionMapRecord SFR(std::move(LocalFunctionMap)); + SFR.serialize(OS); + + std::unique_ptr Buffer = MemoryBuffer::getMemBuffer( + OS.str(), "in-memory stable function map", false); + + Triple TT(M.getTargetTriple()); + embedBufferInModule(M, *Buffer.get(), + getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()), + Align(4)); +} + +bool GlobalMergeFunc::runOnModule(Module &M) { + bool Changed = false; + initializeMergerMode(M); + + switch (MergerMode) { + case HashFunctionMode::BuildingHashFuncion: { + analyze(M); + if (!LocalFunctionMap->empty()) + emitFunctionMap(M); + break; + } + case HashFunctionMode::UsingHashFunction: { + // Merge with the global function map read from CG data. + Changed = merge(M, cgdata::getStableFunctionMap()); + break; + } + default: { + // Local merge case (within a module) in one-pass without explicitly using + // CGData. + analyze(M); + LocalFunctionMap->finalize(); + Changed = merge(M, LocalFunctionMap.get()); + } + } + + return Changed; +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll new file mode 100644 index 00000000000000..4f5f5c0b5b26d9 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll @@ -0,0 +1,62 @@ +; This test checks if two similar functions, f1 and f2, can be merged locally within a single module +; while parameterizing a difference in their global variables, g1 and g2. +; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm, +; which are tail-called from thunks g1 and g2 respectively. +; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC). + +; RUN: opt -module-summary -module-hash %s -o %t + +; RUN: llvm-lto2 run -enable-global-merge-func=false %t -o %tout-nomerge \ +; RUN: -r %t,_f1,px \ +; RUN: -r %t,_f2,px \ +; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l +; RUN: llvm-nm %tout-nomerge.1 | FileCheck %s --check-prefix=NOMERGE +; RUN: llvm-lto2 run -enable-global-merge-func=true %t -o %tout-merge \ +; RUN: -r %t,_f1,px \ +; RUN: -r %t,_f2,px \ +; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l +; RUN: llvm-nm %tout-merge.1 | FileCheck %s --check-prefix=GLOBALMERGE +; RUN: llvm-objdump -d %tout-merge.1 | FileCheck %s --check-prefix=THUNK + +; NOMERGE-NOT: _f1.Tgm +; GLOBALMERGE: _f1.Tgm +; GLOBALMERGE: _f2.Tgm + +; THUNK: <_f1>: +; THUNK-NEXT: adrp x1, +; THUNK-NEXT: ldr x1, [x1] +; THUNK-NEXT: b + +; THUNK: <_f2>: +; THUNK-NEXT: adrp x1, +; THUNK-NEXT: ldr x1, [x1] +; THUNK-NEXT: b + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll new file mode 100644 index 00000000000000..da756e7f15e68e --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll @@ -0,0 +1,82 @@ +; This test demonstrates how similar functions are handled during global outlining. +; Currently, we do not attempt to share an merged function for identical sequences. +; Instead, each merging instance is created uniquely. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; First, run with -codegen-data-generate=true to generate the cgdata in the object files. +; Using llvm-cgdata, merge the cg data. +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2 + +; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary. +; Each module has its own merging instance as it is matched against the merged cgdata. +; RUN: llvm-lto2 run -enable-global-merge-func=true \ +; RUN: -codegen-data-use-path=%tout.cgdata \ +; RUN: %t-foo.bc %t-goo.bc -o %tout-read \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1 +; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2 +; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1 +; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2 + +; READ1: _f1.Tgm +; READ2: _f2.Tgm + +; THUNK1: <_f1>: +; THUNK1-NEXT: adrp x1, +; THUNK1-NEXT: ldr x1, [x1] +; THUNK1-NEXT: b + +; THUNK2: <_f2>: +; THUNK2-NEXT: adrp x1, +; THUNK2-NEXT: ldr x1, [x1] +; THUNK2-NEXT: b + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll new file mode 100644 index 00000000000000..06880e3d268189 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll @@ -0,0 +1,68 @@ +; TODO: This test checks if the how similar functions are handled during global outlining +; by repeating the codegen via -codegen-data-thinlto-two-rounds=true. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1 +; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2 +; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1 +; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2 + +; OUT1: _f1.Tgm +; OUT2: _f2.Tgm + +; THUNK1: <_f1>: +; THUNK1-NEXT: adrp x1, +; THUNK1-NEXT: ldr x1, [x1] +; THUNK1-NEXT: b + +; THUNK2: <_f2>: +; THUNK2-NEXT: adrp x1, +; THUNK2-NEXT: ldr x1, [x1] +; THUNK2-NEXT: b + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll new file mode 100644 index 00000000000000..a4022eb885b43f --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll @@ -0,0 +1,97 @@ +; This test verifies whether a stable function is encoded into the __llvm_merge section +; when the -codegen-data-generate flag is used under -enable-global-merge-func=true. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE +; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE + +; No merge instance is locally created as each module has a singltone function. +; NOWRITE-NOT: _f1.Tgm +; NOWRITE-NOT: _f2.Tgm + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE +; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE +; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME +; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME + +; On a write mode, no merging happens yet for each module. +; We only create stable functions and publish them into __llvm_merge section for each object. +; WRITE-NOT: _f1.Tgm +; WRITE-NOT: _f2.Tgm +; SECTNAME: __llvm_merge + +; Merge the cgdata using llvm-cgdata. +; We now validate the content of the merged cgdata. +; Two functions have the same hash with only one different constnat at a same location. +; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2 +; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s + +; CHECK: - Hash: [[#%d,HASH:]] +; CHECK-NEXT: FunctionName: f1 +; CHECK-NEXT: ModuleName: {{.*}} +; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT:]] +; CHECK-NEXT: IndexOperandHashes: +; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX:]] +; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX:]] +; CHECK-NEXT: OpndHash: {{.*}} + +; CHECK: - Hash: [[#%d,HASH]] +; CHECK-NEXT: FunctionName: f2 +; CHECK-NEXT: ModuleName: {{.*}} +; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT]] +; CHECK-NEXT: IndexOperandHashes: +; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX]] +; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX]] +; CHECK-NEXT: OpndHash: {{.*}} + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt index 3b4644d6e27715..6ddccc7f17442f 100644 --- a/llvm/tools/llvm-lto2/CMakeLists.txt +++ b/llvm/tools/llvm-lto2/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS BitReader CodeGen Core + IPO Linker LTO MC diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index d4f022ef021a44..0dc6623552a4bd 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/PluginLoader.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Threading.h" +#include "llvm/Transforms/IPO.h" #include using namespace llvm; @@ -195,6 +196,7 @@ static cl::opt TryUseNewDbgInfoFormat( extern cl::opt UseNewDbgInfoFormat; extern cl::opt LoadBitcodeIntoNewDbgInfoFormat; extern cl::opt PreserveInputDbgFormat; +extern cl::opt EnableGlobalMergeFunc; static void check(Error E, std::string Msg) { if (!E) @@ -374,6 +376,10 @@ static int run(int argc, char **argv) { if (DI.getSeverity() == DS_Error) HasErrors = true; }; + Conf.PreCodeGenPassesHook = [](legacy::PassManager &pm) { + if (EnableGlobalMergeFunc) + pm.add(createGlobalMergeFuncPass()); + }; LTO::LTOKind LTOMode = LTO::LTOK_Default;