Skip to content

Commit

Permalink
merge main into amd-staging
Browse files Browse the repository at this point in the history
Change-Id: I7403aa36c6ed6e21ac723a2b3cf90b83680b7585
  • Loading branch information
Jenkins committed Oct 25, 2024
2 parents 4d7f7c6 + a31ce36 commit 7f173c2
Show file tree
Hide file tree
Showing 315 changed files with 8,224 additions and 1,330 deletions.
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ class BinaryFunction {
/// Raw branch count for this function in the profile.
uint64_t RawBranchCount{0};

/// Dynamically executed function bytes, used for density computation.
uint64_t SampleCountInBytes{0};

/// Indicates the type of profile the function is using.
uint16_t ProfileFlags{PF_NONE};

Expand Down Expand Up @@ -1844,6 +1847,9 @@ class BinaryFunction {
/// to this function.
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }

/// Return the number of dynamically executed bytes, from raw perf data.
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }

/// Return the execution count for functions with known profile.
/// Return 0 if the function has no profile.
uint64_t getKnownExecutionCount() const {
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Utils/CommandLineOpts.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
enum ProfileFormatKind { PF_Fdata, PF_YAML };

extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
extern llvm::cl::opt<bool> ShowDensity;
extern llvm::cl::opt<bool> SplitEH;
extern llvm::cl::opt<bool> StrictMode;
extern llvm::cl::opt<bool> TimeOpts;
Expand Down
74 changes: 74 additions & 0 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Passes/ReorderAlgorithm.h"
#include "bolt/Passes/ReorderFunctions.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include <atomic>
#include <mutex>
Expand Down Expand Up @@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
"functions section"),
cl::init(100), cl::Hidden, cl::cat(BoltCategory));

// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
static cl::opt<int> ProfileDensityCutOffHot(
"profile-density-cutoff-hot", cl::init(990000),
cl::desc("Total samples cutoff for functions used to calculate "
"profile density."));

static cl::opt<double> ProfileDensityThreshold(
"profile-density-threshold", cl::init(60),
cl::desc("If the profile density is below the given threshold, it "
"will be suggested to increase the sampling rate."),
cl::Optional);

} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
uint64_t StaleSampleCount = 0;
uint64_t InferredSampleCount = 0;
std::vector<const BinaryFunction *> ProfiledFunctions;
std::vector<std::pair<double, uint64_t>> FuncDensityList;
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
Expand Down Expand Up @@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
StaleSampleCount += SampleCount;
++NumAllStaleFunctions;
}

if (opts::ShowDensity) {
uint64_t Size = Function.getSize();
// In case of BOLT split functions registered in BAT, executed traces are
// automatically attributed to the main fragment. Add up function sizes
// for all fragments.
if (IsHotParentOfBOLTSplitFunction)
for (const BinaryFunction *Fragment : Function.getFragments())
Size += Fragment->getSize();
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
FuncDensityList.emplace_back(Density, SampleCount);
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
<< Function.getSampleCountInBytes() << ", size (b) "
<< Size << ", density " << Density
<< ", sample count " << SampleCount << '\n');
}
}
BC.NumProfiledFuncs = ProfiledFunctions.size();
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
Expand Down Expand Up @@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
BC.outs() << ". Use -print-unknown to see the list.";
BC.outs() << '\n';
}

if (opts::ShowDensity) {
double Density = 0.0;
// Sorted by the density in descending order.
llvm::stable_sort(FuncDensityList,
[&](const std::pair<double, uint64_t> &A,
const std::pair<double, uint64_t> &B) {
if (A.first != B.first)
return A.first > B.first;
return A.second < B.second;
});

uint64_t AccumulatedSamples = 0;
uint32_t I = 0;
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
"The cutoff value is greater than 1000000(100%)");
while (AccumulatedSamples <
TotalSampleCount *
static_cast<float>(opts::ProfileDensityCutOffHot) /
1000000 &&
I < FuncDensityList.size()) {
AccumulatedSamples += FuncDensityList[I].second;
Density = FuncDensityList[I].first;
I++;
}
if (Density == 0.0) {
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
"--profile-density-cutoff-hot option is "
"set too low. Please check your command.\n";
} else if (Density < opts::ProfileDensityThreshold) {
BC.errs()
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
<< format("%.1f", opts::ProfileDensityThreshold / Density)
<< "x more samples. Please consider increasing sampling rate or "
"profiling for longer duration to get more samples.\n";
}

BC.outs() << "BOLT-INFO: Functions with density >= "
<< format("%.1f", Density) << " account for "
<< format("%.2f",
static_cast<double>(opts::ProfileDensityCutOffHot) /
10000)
<< "% total sample counts.\n";
}
return Error::success();
}

Expand Down
15 changes: 12 additions & 3 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
FuncBranchData *FBD = getBranchData(BF);
if (FBD || getFuncSampleData(BF.getNames())) {
BF.markProfiled(Flags);
if (FBD)
BF.RawBranchCount = FBD->getNumExecutedBranches();
}
}

for (auto &FuncBranches : NamesToBranches)
Expand Down Expand Up @@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
return false;
}

// Set ParentFunc to BAT parent function or FromFunc itself.
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
if (!ParentFunc)
ParentFunc = FromFunc;
ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);

std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
Second.From)
Expand All @@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
for (auto [From, To] : *FTs) {
if (BAT) {
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
}
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
doIntraBranch(*ParentFunc, From, To, Count, false);
}

return true;
Expand Down
4 changes: 4 additions & 0 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ cl::opt<std::string> SaveProfile("w",
cl::desc("save recorded profile to a file"),
cl::cat(BoltOutputCategory));

cl::opt<bool> ShowDensity("show-density",
cl::desc("show profile density details"),
cl::Optional, cl::cat(AggregatorCategory));

cl::opt<bool> SplitEH("split-eh", cl::desc("split C++ exception handling code"),
cl::Hidden, cl::cat(BoltOptCategory));

Expand Down
14 changes: 13 additions & 1 deletion bolt/test/X86/pre-aggregated-perf.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,19 @@ REQUIRES: system-linux

RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
RUN: --profile-use-dfs | FileCheck %s
RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B

CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.

RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
RUN: --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING

CHECK-WARNING: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-WARNING: BOLT-WARNING: BOLT is estimated to optimize better with 2.8x more samples.
CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.

RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
Expand Down
1 change: 1 addition & 0 deletions bolt/tools/driver/llvm-bolt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ void perf2boltMode(int argc, char **argv) {
exit(1);
}
opts::AggregateOnly = true;
opts::ShowDensity = true;
}

void boltDiffMode(int argc, char **argv) {
Expand Down
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,9 @@ X86 Support

- All intrinsics in tbmintrin.h can now be used in constant expressions.

- Supported intrinsics for ``MOVRS AND AVX10.2``.
* Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.

Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions clang/docs/analyzer/user-docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Contents:
user-docs/FilingBugs
user-docs/CrossTranslationUnit
user-docs/TaintAnalysisConfiguration
user-docs/FAQ
Loading

0 comments on commit 7f173c2

Please sign in to comment.