Skip to content

Commit

Permalink
[MachineSink] Fix missing sinks along critical edges (#97618)
Browse files Browse the repository at this point in the history
4e0bd3f improved early MachineLICM's capabilities to hoist COPY from
physical registers out of a loop. However, it accidentally broke one of
MachineSink's preconditions on sinking cheap instructions (in this case,
COPY) which considered those instructions being profitable to sink only
when there are at least two of them in the same def-use chain in the
same basic block. So if early MachineLICM hoisted one of them out,
MachineSink no longer sink rest of the cheap instructions. This results
in redundant load immediate instructions from the motivating example
we've seen on RISC-V.

This patch fixes this by teaching MachineSink that if there is more than
one demand to sink a register into the same block from different
critical edges, it should be considered profitable as it increases the
CSE opportunities.
This change also improves two of the AArch64's cases.
  • Loading branch information
mshockwave authored Jul 9, 2024
1 parent 90d79e2 commit 7e2f961
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 57 deletions.
81 changes: 66 additions & 15 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ namespace {
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
CEBCandidates;
// Memorize the register that also wanted to sink into the same block along
// a different critical edge.
// {register to sink, sink-to block} -> the first sink-from block.
// We're recording the first sink-from block because that (critical) edge
// was deferred until we see another register that's going to sink into the
// same block.
DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
CEMergeCandidates;
// Remember which edges we are about to split.
// This is different from CEBCandidates since those edges
// will be split.
Expand Down Expand Up @@ -197,14 +205,17 @@ namespace {

void releaseMemory() override {
CEBCandidates.clear();
CEMergeCandidates.clear();
}

private:
bool ProcessBlock(MachineBasicBlock &MBB);
void ProcessDbgInst(MachineInstr &MI);
bool isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
MachineBasicBlock *To, bool BreakPHIEdge);
bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
MachineBasicBlock *To,
MachineBasicBlock *&DeferredFromBlock);

bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
MachineInstr &MI);
Expand Down Expand Up @@ -725,6 +736,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {

// Process all basic blocks.
CEBCandidates.clear();
CEMergeCandidates.clear();
ToSplit.clear();
for (auto &MBB: MF)
MadeChange |= ProcessBlock(MBB);
Expand Down Expand Up @@ -873,9 +885,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
SeenDbgVars.insert(Var);
}

bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To) {
bool MachineSinking::isWorthBreakingCriticalEdge(
MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
MachineBasicBlock *&DeferredFromBlock) {
// FIXME: Need much better heuristics.

// If the pass has already considered breaking this edge (during this pass
Expand All @@ -887,6 +899,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;

// Check and record the register and the destination block we want to sink
// into. Note that we want to do the following before the next check on branch
// probability. Because we want to record the initial candidate even if it's
// on hot edge, so that other candidates that might not on hot edges can be
// sinked as well.
for (const auto &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (!Reg)
continue;
Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
auto Key = std::make_pair(SrcReg, To);
auto Res = CEMergeCandidates.try_emplace(Key, From);
// We wanted to sink the same register into the same block, consider it to
// be profitable.
if (!Res.second) {
// Return the source block that was previously held off.
DeferredFromBlock = Res.first->second;
return true;
}
}

if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
BranchProbability(SplitEdgeProbabilityThreshold, 100))
return true;
Expand Down Expand Up @@ -921,13 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
return false;
}

bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
return false;

bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
// Avoid breaking back edge. From == To means backedge for single BB cycle.
if (!SplitEdges || FromBB == ToBB)
return false;
Expand Down Expand Up @@ -985,11 +1015,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return false;
}

ToSplit.insert(std::make_pair(FromBB, ToBB));

return true;
}

bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
bool Status = false;
MachineBasicBlock *DeferredFromBB = nullptr;
if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) {
// If there is a DeferredFromBB, we consider FromBB only if _both_
// of them are legal to split.
if ((!DeferredFromBB ||
ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
ToSplit.insert(std::make_pair(FromBB, ToBB));
if (DeferredFromBB)
ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
Status = true;
}
}

return Status;
}

std::vector<unsigned> &
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
// Currently to save compiling time, MBB's register pressure will not change
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/AArch64/and-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,27 +46,24 @@ bb2:
define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-LABEL: and_sink2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: adrp x9, A
; CHECK-NEXT: str wzr, [x9, :lo12:A]
; CHECK-NEXT: adrp x8, A
; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: tbz w1, #0, .LBB1_5
; CHECK-NEXT: // %bb.1: // %bb0.preheader
; CHECK-NEXT: adrp x8, B
; CHECK-NEXT: adrp x9, C
; CHECK-NEXT: .LBB1_2: // %bb0
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str wzr, [x8, :lo12:B]
; CHECK-NEXT: tbz w2, #0, .LBB1_6
; CHECK-NEXT: tbz w2, #0, .LBB1_5
; CHECK-NEXT: // %bb.3: // %bb1
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: str wzr, [x9, :lo12:C]
; CHECK-NEXT: tbnz w0, #2, .LBB1_2
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB1_5: // %common.ret
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_6:
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret

Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
define i64 @test_or(i32 %a, i32 %b) {
; CHECK-LABEL: test_or:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB0_2
; CHECK-NEXT: LBB0_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB0_2
; CHECK-NEXT: LBB0_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB0_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand All @@ -37,13 +35,11 @@ bb4:
define i64 @test_or_select(i32 %a, i32 %b) {
; CHECK-LABEL: test_or_select:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB1_2
; CHECK-NEXT: LBB1_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB1_2
; CHECK-NEXT: LBB1_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB1_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB1_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand All @@ -70,13 +66,11 @@ bb4:
define i64 @test_and(i32 %a, i32 %b) {
; CHECK-LABEL: test_and:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB2_2
; CHECK-NEXT: LBB2_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB2_2
; CHECK-NEXT: LBB2_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB2_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB2_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand All @@ -103,13 +97,11 @@ bb4:
define i64 @test_and_select(i32 %a, i32 %b) {
; CHECK-LABEL: test_and_select:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB3_2
; CHECK-NEXT: LBB3_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB3_2
; CHECK-NEXT: LBB3_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB3_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB3_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand Down
34 changes: 16 additions & 18 deletions llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call toupper
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz s0, .LBB0_25
; CHECK-NEXT: beqz s0, .LBB0_26
; CHECK-NEXT: # %bb.1: # %while.body.preheader
; CHECK-NEXT: li a2, 1
; CHECK-NEXT: li a3, 9
Expand Down Expand Up @@ -55,36 +55,34 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: beq a2, a3, .LBB0_2
; CHECK-NEXT: # %bb.14: # %while.body.6
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126
; CHECK-NEXT: addi s0, s1, 7
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: .LBB0_16: # %while.body
; CHECK-NEXT: bnez a2, .LBB0_18
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_16: # %while.body
; CHECK-NEXT: beqz a2, .LBB0_26
; CHECK-NEXT: j .LBB0_18
; CHECK-NEXT: .LBB0_17: # %while.body.1
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit
; CHECK-NEXT: li s0, 0
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_19: # %while.body.3
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120
; CHECK-NEXT: addi s0, s1, 4
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_21: # %while.body.4
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122
; CHECK-NEXT: addi s0, s1, 5
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_23: # %while.body.5
; CHECK-NEXT: bnez a2, .LBB0_25
; CHECK-NEXT: .LBB0_24:
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: .LBB0_24: # %strdup.exit
; CHECK-NEXT: j .LBB0_26
; CHECK-NEXT: .LBB0_25: # %strdup.exit
; CHECK-NEXT: li s1, 0
; CHECK-NEXT: mv s2, a0
; CHECK-NEXT: li a0, 0
Expand All @@ -95,7 +93,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: jalr s1
; CHECK-NEXT: li a1, 1
; CHECK-NEXT: .LBB0_25: # %return
; CHECK-NEXT: .LBB0_26: # %return
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
Expand Down

0 comments on commit 7e2f961

Please sign in to comment.