Skip to content

Commit

Permalink
Address comments from tschuett
Browse files Browse the repository at this point in the history
  • Loading branch information
kyulee-com committed Nov 5, 2024
1 parent 4e2d83d commit e86d78b
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 50 deletions.
3 changes: 3 additions & 0 deletions llvm/include/llvm/Transforms/IPO.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ enum class PassSummaryAction {
Export, ///< Export information to summary.
};

/// createGlobalMergeFuncPass - This pass generates merged instances by
/// parameterizing distinct constants across similar functions, utilizing stable
/// function hash information.
Pass *createGlobalMergeFuncPass();

} // End llvm namespace
Expand Down
45 changes: 23 additions & 22 deletions llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,29 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// This file defines global merge functions pass and related data structure.
///
//
// This pass defines the implementation of a function merging mechanism
// that utilizes a stable function hash to track differences in constants and
// identify potential merge candidates. The process involves two rounds:
// 1. The first round collects stable function hashes and identifies merge
// candidates with matching hashes. It also computes the set of parameters
// that point to different constants during the stable function merge.
// 2. The second round leverages this collected global function information to
// optimistically create a merged function in each module context, ensuring
// correct transformation.
// Similar to the global outliner, this approach uses the linker's deduplication
// (ICF) to fold identical merged functions, thereby reducing the final binary
// size. The work is inspired by the concepts discussed in the following paper:
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
//
//===----------------------------------------------------------------------===//

#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
#ifndef LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
#define LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CGData/StableFunctionMap.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include <map>
#include <mutex>

enum class HashFunctionMode {
Local,
Expand All @@ -36,15 +42,10 @@ namespace llvm {
using ParamLocs = SmallVector<IndexPair, 4>;
// A vector of parameters
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
// A map of stable hash to a vector of stable functions

/// GlobalMergeFunc finds functions which only differ by constants in
/// certain instructions, e.g. resulting from specialized functions of layout
/// compatible types.
/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
/// hash to find the merge candidate. Similar to the global outliner, we can run
/// codegen twice to collect function merge candidate in the first round, and
/// merge functions globally in the second round.

/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
/// using stable function hashes. It identifies and merges functions with
/// matching hashes across modules to optimize binary size.
class GlobalMergeFunc : public ModulePass {
HashFunctionMode MergerMode = HashFunctionMode::Local;

Expand All @@ -69,9 +70,9 @@ class GlobalMergeFunc : public ModulePass {
/// Emit LocalFunctionMap into __llvm_merge section.
void emitFunctionMap(Module &M);

/// Merge functions in the module using the global function map.
/// Merge functions in the module using the given function map.
bool merge(Module &M, const StableFunctionMap *FunctionMap);
};

} // end namespace llvm
#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
#endif // LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
53 changes: 25 additions & 28 deletions llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@
//
//===----------------------------------------------------------------------===//
//
// TODO: This implements a function merge using function hash while tracking
// differences in Constants. This uses stable function hash to find potential
// merge candidates. The first codegen round collects stable function hashes,
// and determines the merge candidates that match the stable function hashes.
// The set of parameters pointing to different Constants are also computed
// during the stable function merge. The second codegen round uses this global
// function info to optimistically create a merged function in each module
// context to guarantee correct transformation. Similar to the global outliner,
// the linker's deduplication (ICF) folds the identical merged functions to save
// the final binary size.
// This pass defines the implementation of a function merging mechanism
// that utilizes a stable function hash to track differences in constants and
// create potential merge candidates. The process involves two rounds:
// 1. The first round collects stable function hashes and identifies merge
// candidates with matching hashes. It also computes the set of parameters
// that point to different constants during the stable function merge.
// 2. The second round leverages this collected global function information to
// optimistically create a merged function in each module context, ensuring
// correct transformation.
// Similar to the global outliner, this approach uses the linker's deduplication
// (ICF) to fold identical merged functions, thereby reducing the final binary
// size. The work is inspired by the concepts discussed in the following paper:
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/StableFunctionMap.h"
#include "llvm/CodeGen/MachineStableHash.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/InitializePasses.h"
Expand Down Expand Up @@ -84,7 +84,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");

/// Returns true if the \opIdx operand of \p CI is the callee operand.
/// Returns true if the \OpIdx operand of \p CI is the callee operand.
static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
}
Expand Down Expand Up @@ -148,22 +148,19 @@ bool isEligibleFunction(Function *F) {
if (F->hasFnAttribute(llvm::Attribute::NoMerge))
return false;

if (F->hasAvailableExternallyLinkage()) {
if (F->hasAvailableExternallyLinkage())
return false;
}

if (F->getFunctionType()->isVarArg()) {
if (F->getFunctionType()->isVarArg())
return false;
}

if (F->getCallingConv() == CallingConv::SwiftTail)
return false;

// if function contains callsites with musttail, if we merge
// If function contains callsites with musttail, if we merge
// it, the merged function will have the musttail callsite, but
// the number of parameters can change, thus the parameter count
// of the callsite will mismatch with the function itself.
// if (IgnoreMusttailFunction) {
for (const BasicBlock &BB : *F) {
for (const Instruction &I : BB) {
const auto *CB = dyn_cast<CallBase>(&I);
Expand Down Expand Up @@ -203,7 +200,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
return true;
}

// copy from merge functions.cpp
static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
if (SrcTy->isStructTy()) {
Expand Down Expand Up @@ -252,7 +248,8 @@ void GlobalMergeFunc::analyze(Module &M) {

auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);

// Convert the map to a vector for a serialization-friendly format.
// Convert the operand map to a vector for a serialization-friendly
// format.
IndexOperandHashVecType IndexOperandHashes;
for (auto &Pair : *FI.IndexOperandHashMap)
IndexOperandHashes.emplace_back(Pair);
Expand Down Expand Up @@ -597,7 +594,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
// This module check is not strictly necessary as the functions can move
// around. We just want to avoid merging functions from different
// modules than the first one in the functon map, as they may not end up
// with not being ICFed.
// with not being ICFed by the linker.
if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
++NumMismatchedModuleIdGlobalMergeFunction;
continue;
Expand All @@ -618,12 +615,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
<< " in " << ModId << "\n";
});

for (auto &FMI : FuncMergeInfos) {
Changed = true;

// We've already validated all locations of constant operands pointed by
// the parameters. Just use the first one to bookkeep the original
// constants for each parameter
// the parameters. Populate parameters pointing to the original constants.
SmallVector<Constant *> Params;
SmallVector<Type *> ParamTypes;
for (auto &ParamLocs : ParamLocsVec) {
Expand All @@ -635,8 +632,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
ParamTypes.push_back(Opnd->getType());
}

// Create a merged function derived from the first function in the current
// module context.
// Create a merged function derived from the current function.
Function *MergedFunc =
createMergedFunction(FMI, ParamTypes, ParamLocsVec);

Expand All @@ -647,7 +643,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
MergedFunc->dump();
});

// Create a thunk to the merged function.
// Transform the current function into a thunk that calls the merged
// function.
createThunk(FMI, Params, MergedFunc);
LLVM_DEBUG({
dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
Expand Down

0 comments on commit e86d78b

Please sign in to comment.