Skip to content

Commit

Permalink
Recommit "[X86] Don't always separate conditions in `(br (and/or cond…
Browse files Browse the repository at this point in the history
…0, cond1))` into separate branches" (2nd Try)

Changes in Recommit:
    1) Fix non-determanism by using `SmallMapVector` instead of
       `SmallPtrSet`.
    2) Fix bug in dependency pruning where we discounted the actual
       `and/or` combining the two conditions. This lead to over pruning.

Closes llvm#81689
  • Loading branch information
goldsteinn committed Mar 4, 2024
1 parent a81a7b9 commit a4951ec
Show file tree
Hide file tree
Showing 25 changed files with 793 additions and 622 deletions.
36 changes: 36 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,42 @@ class TargetLoweringBase {
/// avoided.
bool isJumpExpensive() const { return JumpIsExpensive; }

// Costs parameters used by
// SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
// shouldKeepJumpConditionsTogether will use these parameter value to
// determine if two conditions in the form `br (and/or cond1, cond2)` should
// be split into two branches or left as one.
//
// BaseCost is the cost threshold (in latency). If the estimated latency of
// computing both `cond1` and `cond2` is below the cost of just computing
// `cond1` + BaseCost, the two conditions will be kept together. Otherwise
// they will be split.
//
// LikelyBias increases BaseCost if branch probability info indicates that it
// is likely that both `cond1` and `cond2` will be computed.
//
// UnlikelyBias decreases BaseCost if branch probability info indicates that
// it is likely that both `cond1` and `cond2` will be computed.
//
// Set any field to -1 to make it ignored (setting BaseCost to -1 results in
// `shouldKeepJumpConditionsTogether` always returning false).
struct CondMergingParams {
int BaseCost;
int LikelyBias;
int UnlikelyBias;
};
// Return params for deciding if we should keep two branch conditions merged
// or split them into two separate branches.
// Arg0: The binary op joining the two conditions (and/or).
// Arg1: The first condition (cond1)
// Arg2: The second condition (cond2)
virtual CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
const Value *) const {
// -1 will always result in splitting.
return {-1, -1, -1};
}

/// Return true if selects are only cheaper than branches if the branch is
/// unlikely to be predicted right.
bool isPredictableSelectExpensive() const {
Expand Down
157 changes: 155 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
Expand Down Expand Up @@ -93,6 +94,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
Expand Down Expand Up @@ -2446,6 +2448,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SL->SwitchCases.push_back(CB);
}

// Collect dependencies on V recursively. This is used for the cost analysis in
// `shouldKeepJumpConditionsTogether`.
static bool collectInstructionDeps(
SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V,
SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr,
unsigned Depth = 0) {
// Return false if we have an incomplete count.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;

auto *I = dyn_cast<Instruction>(V);
if (I == nullptr)
return true;

if (Necessary != nullptr) {
// This instruction is necessary for the other side of the condition so
// don't count it.
if (Necessary->contains(I))
return true;
}

// Already added this dep.
if (!Deps->try_emplace(I, false).second)
return true;

for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
Depth + 1))
return false;
return true;
}

bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const {
if (I.getNumSuccessors() != 2)
return false;

if (!I.isConditional())
return false;

if (Params.BaseCost < 0)
return false;

// Baseline cost.
InstructionCost CostThresh = Params.BaseCost;

BranchProbabilityInfo *BPI = nullptr;
if (Params.LikelyBias || Params.UnlikelyBias)
BPI = FuncInfo.BPI;
if (BPI != nullptr) {
// See if we are either likely to get an early out or compute both lhs/rhs
// of the condition.
BasicBlock *IfFalse = I.getSuccessor(0);
BasicBlock *IfTrue = I.getSuccessor(1);

std::optional<bool> Likely;
if (BPI->isEdgeHot(I.getParent(), IfTrue))
Likely = true;
else if (BPI->isEdgeHot(I.getParent(), IfFalse))
Likely = false;

if (Likely) {
if (Opc == (*Likely ? Instruction::And : Instruction::Or))
// Its likely we will have to compute both lhs and rhs of condition
CostThresh += Params.LikelyBias;
else {
if (Params.UnlikelyBias < 0)
return false;
// Its likely we will get an early out.
CostThresh -= Params.UnlikelyBias;
}
}
}

if (CostThresh <= 0)
return false;

// Collect "all" instructions that lhs condition is dependent on.
// Use map for stable iteration (to avoid non-determanism of iteration of
// SmallPtrSet). The `bool` value is just a dummy.
SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps;
collectInstructionDeps(&LhsDeps, Lhs);
// Collect "all" instructions that rhs condition is dependent on AND are
// dependencies of lhs. This gives us an estimate on which instructions we
// stand to save by splitting the condition.
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
return false;
// Add the compare instruction itself unless its a dependency on the LHS.
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
if (!LhsDeps.contains(RhsI))
RhsDeps.try_emplace(RhsI, false);

const auto &TLI = DAG.getTargetLoweringInfo();
const auto &TTI =
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());

InstructionCost CostOfIncluding = 0;
// See if this instruction will need to computed independently of whether RHS
// is.
Value *BrCond = I.getCondition();
auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) {
for (const auto *U : Ins->users()) {
// If user is independent of RHS calculation we don't need to count it.
if (auto *UIns = dyn_cast<Instruction>(U))
if (UIns != BrCond && !RhsDeps.contains(UIns))
return false;
}
return true;
};

// Prune instructions from RHS Deps that are dependencies of unrelated
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
// arbitrary and just meant to cap the how much time we spend in the pruning
// loop. Its highly unlikely to come into affect.
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
// Stop after a certain point. No incorrectness from including too many
// instructions.
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
const Instruction *ToDrop = nullptr;
for (const auto &InsPair : RhsDeps) {
if (!ShouldCountInsn(InsPair.first)) {
ToDrop = InsPair.first;
break;
}
}
if (ToDrop == nullptr)
break;
RhsDeps.erase(ToDrop);
}

for (const auto &InsPair : RhsDeps) {
// Finally accumulate latency that we can only attribute to computing the
// RHS condition. Use latency because we are essentially trying to calculate
// the cost of the dependency chain.
// Possible TODO: We could try to estimate ILP and make this more precise.
CostOfIncluding +=
TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency);

if (CostOfIncluding > CostThresh)
return false;
}
return true;
}

void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
Expand Down Expand Up @@ -2660,8 +2808,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;

if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
if (Opcode &&
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
!shouldKeepJumpConditionsTogether(
FuncInfo, I, Opcode, BOp0, BOp1,
DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
Opcode, BOp0, BOp1))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@ class SelectionDAGBuilder {
N = NewN;
}

bool shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const;

void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,40 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"alignment set by x86-experimental-pref-loop-alignment."),
cl::Hidden);

static cl::opt<int> BrMergingBaseCostThresh(
"x86-br-merging-base-cost", cl::init(2),
cl::desc(
"Sets the cost threshold for when multiple conditionals will be merged "
"into one branch versus be split in multiple branches. Merging "
"conditionals saves branches at the cost of additional instructions. "
"This value sets the instruction cost limit, below which conditionals "
"will be merged, and above which conditionals will be split. Set to -1 "
"to never merge branches."),
cl::Hidden);

static cl::opt<int> BrMergingLikelyBias(
"x86-br-merging-likely-bias", cl::init(0),
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
"that all conditionals will be executed. For example for merging "
"the conditionals (a == b && c > d), if its known that a == b is "
"likely, then it is likely that if the conditionals are split "
"both sides will be executed, so it may be desirable to increase "
"the instruction cost threshold. Set to -1 to never merge likely "
"branches."),
cl::Hidden);

static cl::opt<int> BrMergingUnlikelyBias(
"x86-br-merging-unlikely-bias", cl::init(-1),
cl::desc(
"Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
"that all conditionals will be executed. For example for merging "
"the conditionals (a == b && c > d), if its known that a == b is "
"unlikely, then it is unlikely that if the conditionals are split "
"both sides will be executed, so it may be desirable to decrease "
"the instruction cost threshold. Set to -1 to never merge unlikely "
"branches."),
cl::Hidden);

static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
Expand Down Expand Up @@ -3338,6 +3372,24 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
return ISD::SRL;
}

TargetLoweringBase::CondMergingParams
X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
const Value *Lhs,
const Value *Rhs) const {
using namespace llvm::PatternMatch;
int BaseCost = BrMergingBaseCostThresh.getValue();
// a == b && a == c is a fast pattern on x86.
ICmpInst::Predicate Pred;
if (BaseCost >= 0 && Opc == Instruction::And &&
match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
Pred == ICmpInst::ICMP_EQ &&
match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
Pred == ICmpInst::ICMP_EQ)
BaseCost += 1;
return {BaseCost, BrMergingLikelyBias.getValue(),
BrMergingUnlikelyBias.getValue()};
}

bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,10 @@ namespace llvm {

bool preferScalarizeSplat(SDNode *N) const override;

CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
const Value *Rhs) const override;

bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;

Expand Down
11 changes: 6 additions & 5 deletions llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
; CHECK-NEXT: movl _block, %esi
; CHECK-NEXT: movb %al, 1(%esi,%edx)
; CHECK-NEXT: cmpl %ecx, _last
; CHECK-NEXT: jge LBB0_3
; CHECK-NEXT: ## %bb.1: ## %label.0
; CHECK-NEXT: setl %cl
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
; CHECK-NEXT: je LBB0_3
; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
; CHECK-NEXT: setne %al
; CHECK-NEXT: testb %al, %cl
; CHECK-NEXT: je LBB0_2
; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
Expand Down
Loading

0 comments on commit a4951ec

Please sign in to comment.