Skip to content

Commit

Permalink
[MC][NFC] Reduce Address2ProbesMap size
Browse files Browse the repository at this point in the history
Replace the map from addresses to list of probes with a flat vector
containing probe references sorted by their addresses.

Reduces pseudo probe parsing time from 9.56s to 8.59s and peak RSS from
9.66 GiB to 9.08 GiB as part of perf2bolt processing a large binary.

Test Plan:
```
bin/llvm-lit -sv test/tools/llvm-profgen
```

Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm

Reviewed By: wlei-llvm

Pull Request: llvm#102904
  • Loading branch information
aaupov authored Aug 26, 2024
1 parent 04ebd19 commit ee09f7d
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 95 deletions.
14 changes: 6 additions & 8 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
Fragments.insert(BF);
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
const auto &FragmentProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr),
ProbeMap.lower_bound(FuncAddr + F->getSize()));
for (const auto &[OutputAddress, Probes] : FragmentProbes) {
for (const MCDecodedPseudoProbe &Probe :
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
}
}
}
Expand Down
11 changes: 4 additions & 7 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
const auto &BlockProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
ProbeMap.lower_bound(FuncAddr + BlockRange.second));
for (const auto &[_, Probes] : BlockProbes)
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
}

YamlBF.Blocks.emplace_back(YamlBB);
Expand Down
83 changes: 34 additions & 49 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,59 +173,50 @@ void PseudoProbeRewriter::updatePseudoProbes() {
AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();

for (auto &AP : Address2ProbesMap) {
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first);
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
uint64_t Address = Probe.getAddress();
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address);
// If F is removed, eliminate all probes inside it from inline tree
// Setting probes' addresses as INT64_MAX means elimination
if (!F) {
for (MCDecodedPseudoProbe &Probe : AP.second)
Probe.setAddress(INT64_MAX);
Probe.setAddress(INT64_MAX);
continue;
}
// If F is not emitted, the function will remain in the same address as its
// input
if (!F->isEmitted())
continue;

uint64_t Offset = AP.first - F->getAddress();
uint64_t Offset = Address - F->getAddress();
const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
// Check if block output address is defined.
// If not, such block is removed from binary. Then remove the probes from
// inline tree
if (BlkOutputAddress == 0) {
for (MCDecodedPseudoProbe &Probe : AP.second)
Probe.setAddress(INT64_MAX);
Probe.setAddress(INT64_MAX);
continue;
}

unsigned ProbeTrack = AP.second.size();
auto Probe = llvm::map_iterator(
AP.second.begin(),
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
while (ProbeTrack != 0) {
if (Probe->isBlock()) {
Probe->setAddress(BlkOutputAddress);
} else if (Probe->isCall()) {
// A call probe may be duplicated due to ICP
// Go through output of InputOffsetToAddressMap to collect all related
// probes
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
auto CallOutputAddress = CallOutputAddresses.first;
if (CallOutputAddress == CallOutputAddresses.second) {
Probe->setAddress(INT64_MAX);
} else {
Probe->setAddress(CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}

while (CallOutputAddress != CallOutputAddresses.second) {
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}
if (Probe.isBlock()) {
Probe.setAddress(BlkOutputAddress);
} else if (Probe.isCall()) {
// A call probe may be duplicated due to ICP
// Go through output of InputOffsetToAddressMap to collect all related
// probes
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address);
auto CallOutputAddress = CallOutputAddresses.first;
if (CallOutputAddress == CallOutputAddresses.second) {
Probe.setAddress(INT64_MAX);
} else {
Probe.setAddress(CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}

while (CallOutputAddress != CallOutputAddresses.second) {
ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}
Probe = std::next(Probe);
ProbeTrack--;
}
}

Expand All @@ -241,22 +232,16 @@ void PseudoProbeRewriter::updatePseudoProbes() {
BinaryBlock.getName();

// scan all addresses -> correlate probe to block when print out
std::vector<uint64_t> Addresses;
for (auto &Entry : Address2ProbesMap)
Addresses.push_back(Entry.first);
llvm::sort(Addresses);
for (uint64_t Key : Addresses) {
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
if (Probe.getAddress() == INT64_MAX)
outs() << "Deleted Probe: ";
else
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
Probe.print(outs(), GUID2Func, true);
// print block name only if the probe is block type and undeleted.
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
<< Addr2BlockNames[Probe.getAddress()] << "\n";
}
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
if (Probe.getAddress() == INT64_MAX)
outs() << "Deleted Probe: ";
else
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
Probe.print(outs(), GUID2Func, true);
// print block name only if the probe is block type and undeleted.
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
<< Addr2BlockNames[Probe.getAddress()] << "\n";
}
outs() << "=======================================\n";
}
Expand Down
30 changes: 25 additions & 5 deletions llvm/include/llvm/MC/MCPseudoProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/ErrorOr.h"
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -103,10 +102,6 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
// GUID to PseudoProbeFuncDesc map
using GUIDProbeFunctionMap =
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
// Address to pseudo probes map.
using AddressProbesMap =
std::map<uint64_t,
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;

class MCDecodedPseudoProbeInlineTree;

Expand Down Expand Up @@ -213,6 +208,31 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
bool ShowName) const;
};

// Address to pseudo probes map.
class AddressProbesMap
: public std::vector<std::reference_wrapper<MCDecodedPseudoProbe>> {
auto getIt(uint64_t Addr) const {
auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
return Probe.getAddress() < Addr;
};
return llvm::lower_bound(*this, Addr, CompareProbe);
}

public:
// Returns range of probes within [\p From, \p To) address range.
auto find(uint64_t From, uint64_t To) const {
return llvm::make_range(getIt(From), getIt(To));
}
// Returns range of probes with given \p Address.
auto find(uint64_t Address) const {
auto FromIt = getIt(Address);
if (FromIt == end() || FromIt->get().getAddress() != Address)
return llvm::make_range(end(), end());
auto ToIt = getIt(Address + 1);
return llvm::make_range(FromIt, ToIt);
}
};

template <typename ProbesType, typename DerivedProbeInlineTreeType,
typename InlinedProbeTreeMap>
class MCPseudoProbeInlineTreeBase {
Expand Down
43 changes: 22 additions & 21 deletions llvm/lib/MC/MCPseudoProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,6 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
if (Cur && !isSentinelProbe(Attr)) {
PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr,
Discriminator, Cur);
Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
++CurrentProbeCount;
}
LastAddr = Addr;
Expand Down Expand Up @@ -635,6 +634,15 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
"Mismatching probe count pre- and post-parsing");
assert(InlineTreeVec.size() == InlinedCount &&
"Mismatching function records count pre- and post-parsing");

std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
SortedA2P[I] = {Probe.getAddress(), I};
llvm::sort(SortedA2P);
Address2ProbesMap.reserve(ProbeCount);
for (const uint32_t I : llvm::make_second_range(SortedA2P))
Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
SortedA2P.clear();
return true;
}

Expand All @@ -650,36 +658,29 @@ void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {

void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
uint64_t Address) {
auto It = Address2ProbesMap.find(Address);
if (It != Address2ProbesMap.end()) {
for (const MCDecodedPseudoProbe &Probe : It->second) {
OS << " [Probe]:\t";
Probe.print(OS, GUID2FuncDescMap, true);
}
for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
OS << " [Probe]:\t";
Probe.print(OS, GUID2FuncDescMap, true);
}
}

void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
auto Entries = make_first_range(Address2ProbesMap);
SmallVector<uint64_t, 0> Addresses(Entries.begin(), Entries.end());
llvm::sort(Addresses);
for (auto K : Addresses) {
OS << "Address:\t";
OS << K;
OS << "\n";
printProbeForAddress(OS, K);
uint64_t PrevAddress = INT64_MAX;
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
uint64_t Address = Probe.getAddress();
if (Address != PrevAddress) {
PrevAddress = Address;
OS << "Address:\t" << Address << '\n';
}
OS << " [Probe]:\t";
Probe.print(OS, GUID2FuncDescMap, true);
}
}

const MCDecodedPseudoProbe *
MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
auto It = Address2ProbesMap.find(Address);
if (It == Address2ProbesMap.end())
return nullptr;
const auto &Probes = It->second;

const MCDecodedPseudoProbe *CallProbe = nullptr;
for (const MCDecodedPseudoProbe &Probe : Probes) {
for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
if (Probe.isCall()) {
// Disabling the assert and returning first call probe seen so far.
// Subsequent call probes, if any, are ignored. Due to the the way
Expand Down
8 changes: 3 additions & 5 deletions llvm/tools/llvm-profgen/ProfileGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange(
do {
const AddressProbesMap &Address2ProbesMap =
Binary->getAddress2ProbesMap();
auto It = Address2ProbesMap.find(IP.Address);
if (It != Address2ProbesMap.end()) {
for (const MCDecodedPseudoProbe &Probe : It->second) {
ProbeCounter[&Probe] += Count;
}
for (const MCDecodedPseudoProbe &Probe :
Address2ProbesMap.find(IP.Address)) {
ProbeCounter[&Probe] += Count;
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
Expand Down

0 comments on commit ee09f7d

Please sign in to comment.