Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Implement greedy RPO-based block layout #101473

Merged
merged 17 commits into from
May 2, 2024
Merged
50 changes: 47 additions & 3 deletions src/coreclr/jit/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,11 @@ void FlowEdge::addLikelihood(weight_t addedLikelihood)
// AllSuccessorEnumerator: Construct an instance of the enumerator.
//
// Arguments:
// comp - Compiler instance
// block - The block whose successors are to be iterated
// comp - Compiler instance
// block - The block whose successors are to be iterated
// useProfile - Unused; this is needed to match RegularSuccessorEnumerator's parameter list
//
AllSuccessorEnumerator::AllSuccessorEnumerator(Compiler* comp, BasicBlock* block)
AllSuccessorEnumerator::AllSuccessorEnumerator(Compiler* comp, BasicBlock* block, const bool useProfile /* = false */)
: m_block(block)
{
m_numSuccs = 0;
Expand Down Expand Up @@ -169,6 +170,49 @@ AllSuccessorEnumerator::AllSuccessorEnumerator(Compiler* comp, BasicBlock* block
}
}

//------------------------------------------------------------------------
// RegularSuccessorEnumerator: Construct an instance of the enumerator.
//
// Arguments:
// comp - Compiler instance
// block - The block whose successors are to be iterated
// useProfile - If true, determines the order of successors visited using profile data
//
RegularSuccessorEnumerator::RegularSuccessorEnumerator(Compiler* comp, BasicBlock* block, const bool useProfile)
: m_block(block)
{
m_numSuccs = 0;
block->VisitRegularSuccs(
comp,
[this](BasicBlock* succ) {
if (m_numSuccs < ArrLen(m_successors))
{
m_successors[m_numSuccs] = succ;
}

m_numSuccs++;
return BasicBlockVisit::Continue;
},
useProfile);

if (m_numSuccs > ArrLen(m_successors))
{
m_pSuccessors = new (comp, CMK_BasicBlock) BasicBlock*[m_numSuccs];

unsigned numSuccs = 0;
block->VisitRegularSuccs(
comp,
[this, &numSuccs](BasicBlock* succ) {
assert(numSuccs < m_numSuccs);
m_pSuccessors[numSuccs++] = succ;
return BasicBlockVisit::Continue;
},
useProfile);

assert(numSuccs == m_numSuccs);
}
}

//------------------------------------------------------------------------
// BlockPredsWithEH:
// Return list of predecessors, including due to EH flow. This is logically
Expand Down
47 changes: 45 additions & 2 deletions src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -1826,7 +1826,7 @@ struct BasicBlock : private LIR::Range
BasicBlockVisit VisitEHSuccs(Compiler* comp, TFunc func);

template <typename TFunc>
BasicBlockVisit VisitRegularSuccs(Compiler* comp, TFunc func);
BasicBlockVisit VisitRegularSuccs(Compiler* comp, TFunc func, const bool useProfile = false);

bool HasPotentialEHSuccs(Compiler* comp);

Expand Down Expand Up @@ -2518,7 +2518,50 @@ class AllSuccessorEnumerator

public:
// Constructs an enumerator of all `block`'s successors.
AllSuccessorEnumerator(Compiler* comp, BasicBlock* block);
AllSuccessorEnumerator(Compiler* comp, BasicBlock* block, const bool useProfile = false);

// Gets the block whose successors are enumerated.
BasicBlock* Block()
{
return m_block;
}

// Returns the next available successor or `nullptr` if there are no more successors.
BasicBlock* NextSuccessor()
{
m_curSucc++;
if (m_curSucc >= m_numSuccs)
{
return nullptr;
}

if (m_numSuccs <= ArrLen(m_successors))
{
return m_successors[m_curSucc];
}

return m_pSuccessors[m_curSucc];
}
};

// An enumerator of a block's non-EH successors. Used for RPO traversals that exclude EH regions.
class RegularSuccessorEnumerator
{
BasicBlock* m_block;
union
{
// We store up to 4 successors inline in the enumerator. For ASP.NET
// and libraries.pmi this is enough in 99.7% of cases.
BasicBlock* m_successors[4];
BasicBlock** m_pSuccessors;
};

unsigned m_numSuccs;
unsigned m_curSucc = UINT_MAX;

public:
// Constructs an enumerator of `block`'s regular successors.
RegularSuccessorEnumerator(Compiler* comp, BasicBlock* block, const bool useProfile);

// Gets the block whose successors are enumerated.
BasicBlock* Block()
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6054,6 +6054,7 @@ class Compiler
bool fgComputeCalledCount(weight_t returnWeight);

bool fgReorderBlocks(bool useProfile);
void fgDoReversePostOrderLayout();

bool fgFuncletsAreCold();

Expand All @@ -6074,9 +6075,10 @@ class Compiler
PhaseStatus fgSetBlockOrder();
bool fgHasCycleWithoutGCSafePoint();

template<typename VisitPreorder, typename VisitPostorder, typename VisitEdge>
template <typename VisitPreorder, typename VisitPostorder, typename VisitEdge, typename SuccessorEnumerator = AllSuccessorEnumerator, const bool useProfile = false>
unsigned fgRunDfs(VisitPreorder assignPreorder, VisitPostorder assignPostorder, VisitEdge visitEdge);

template <const bool skipEH = false, const bool useProfile = false>
FlowGraphDfsTree* fgComputeDfs();
void fgInvalidateDfsTree();

Expand Down
49 changes: 36 additions & 13 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -696,14 +696,15 @@ BasicBlockVisit BasicBlock::VisitAllSuccs(Compiler* comp, TFunc func)
// VisitRegularSuccs: Visit regular successors of this block.
//
// Arguments:
// comp - Compiler instance
// func - Callback
// comp - Compiler instance
// func - Callback
// useProfile - If true, determines the order of successors visited using profile data
//
// Returns:
// Whether or not the visiting was aborted.
//
template <typename TFunc>
BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func)
BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func, const bool useProfile /* = false */)
{
switch (bbKind)
{
Expand All @@ -721,6 +722,14 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func)
return BasicBlockVisit::Continue;

case BBJ_CALLFINALLY:
if (useProfile)
{
// Don't visit EH successor if using profile data for block reordering.
return isBBCallFinallyPair() ? func(Next()) : BasicBlockVisit::Continue;
}
jakobbotsch marked this conversation as resolved.
Show resolved Hide resolved

return func(GetTarget());

case BBJ_CALLFINALLYRET:
case BBJ_EHCATCHRET:
case BBJ_EHFILTERRET:
Expand All @@ -729,10 +738,18 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func)
return func(GetTarget());

case BBJ_COND:
RETURN_ON_ABORT(func(GetFalseTarget()));

if (!TrueEdgeIs(GetFalseEdge()))
if (TrueEdgeIs(GetFalseEdge()))
{
RETURN_ON_ABORT(func(GetFalseTarget()));
}
else if (useProfile && (GetTrueEdge()->getLikelihood() < GetFalseEdge()->getLikelihood()))
{
RETURN_ON_ABORT(func(GetTrueTarget()));
RETURN_ON_ABORT(func(GetFalseTarget()));
}
else
{
RETURN_ON_ABORT(func(GetFalseTarget()));
RETURN_ON_ABORT(func(GetTrueTarget()));
}

Expand Down Expand Up @@ -4742,9 +4759,11 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason)
// fgRunDfs: Run DFS over the flow graph.
//
// Type parameters:
// VisitPreorder - Functor type that takes a BasicBlock* and its preorder number
// VisitPostorder - Functor type that takes a BasicBlock* and its postorder number
// VisitEdge - Functor type that takes two BasicBlock*.
// VisitPreorder - Functor type that takes a BasicBlock* and its preorder number
// VisitPostorder - Functor type that takes a BasicBlock* and its postorder number
// VisitEdge - Functor type that takes two BasicBlock*.
// SuccessorEnumerator - Enumerator type for controlling which successors are visited
// useProfile - If true, determines order of successors visited using profile data
//
// Parameters:
// visitPreorder - Functor to visit block in its preorder
Expand All @@ -4755,7 +4774,11 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason)
// Returns:
// Number of blocks visited.
//
template <typename VisitPreorder, typename VisitPostorder, typename VisitEdge>
template <typename VisitPreorder,
typename VisitPostorder,
typename VisitEdge,
typename SuccessorEnumerator /* = AllSuccessorEnumerator */,
const bool useProfile /* = false */>
unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPostorder, VisitEdge visitEdge)
{
BitVecTraits traits(fgBBNumMax + 1, this);
Expand All @@ -4764,11 +4787,11 @@ unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPos
unsigned preOrderIndex = 0;
unsigned postOrderIndex = 0;

ArrayStack<AllSuccessorEnumerator> blocks(getAllocator(CMK_DepthFirstSearch));
ArrayStack<SuccessorEnumerator> blocks(getAllocator(CMK_DepthFirstSearch));

auto dfsFrom = [&](BasicBlock* firstBB) {
BitVecOps::AddElemD(&traits, visited, firstBB->bbNum);
blocks.Emplace(this, firstBB);
blocks.Emplace(this, firstBB, useProfile);
visitPreorder(firstBB, preOrderIndex++);

while (!blocks.Empty())
Expand All @@ -4780,7 +4803,7 @@ unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPos
{
if (BitVecOps::TryAddElemD(&traits, visited, succ->bbNum))
{
blocks.Emplace(this, succ);
blocks.Emplace(this, succ, useProfile);
visitPreorder(succ, preOrderIndex++);
}

Expand Down
131 changes: 130 additions & 1 deletion src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3412,9 +3412,23 @@ bool Compiler::fgReorderBlocks(bool useProfile)
}
}

// If we will be reordering blocks, ensure the false target of a BBJ_COND block is its next block
if (useProfile)
{
if (JitConfig.JitDoReversePostOrderLayout())
{
fgDoReversePostOrderLayout();

#ifdef DEBUG
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can just remove this whole #ifdef DEBUG bit, we will check the BB list at end of phase, which is imminent.

if (expensiveDebugCheckLevel >= 2)
{
fgDebugCheckBBlist();
}
#endif // DEBUG

return true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it's unlikely that we won't move blocks here? The other variant tries to keep track, but I'm not sure it is worth the extra bookkeeping. All we are doing by sometimes returning false is speeding up the checked jit a tiny bit.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was my reasoning for hard-coding the return value.

}

// We will be reordering blocks, so ensure the false target of a BBJ_COND block is its next block
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
{
if (block->KindIs(BBJ_COND) && !block->NextIs(block->GetFalseTarget()))
Expand Down Expand Up @@ -4522,6 +4536,121 @@ bool Compiler::fgReorderBlocks(bool useProfile)
#pragma warning(pop)
#endif

//-----------------------------------------------------------------------------
// fgDoReversePostOrderLayout: Reorder blocks using a greedy RPO of the method's
// main body (i.e. non-EH) successors.
//
// Notes:
// This won't reorder blocks in the funclet section, if there are any.
// In the main method body, this will reorder blocks in the same EH region
// to avoid making any regions non-contiguous.
//
void Compiler::fgDoReversePostOrderLayout()
{
#ifdef DEBUG
if (verbose)
{
printf("*************** In fgDoReversePostOrderLayout()\n");

printf("\nInitial BasicBlocks");
fgDispBasicBlocks(verboseTrees);
printf("\n");
}
#endif // DEBUG

// Compute DFS of main function body's blocks, using profile data to determine the order successors are visited in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now a DFS of everything, right? Not just the main function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, updated.

//
FlowGraphDfsTree* const dfsTree = fgComputeDfs</* skipEH */ true, /* useProfile */ true>();

// Fast path: We don't have any EH regions, so just reorder the blocks
//
if (compHndBBtabCount == 0)
{
for (unsigned i = dfsTree->GetPostOrderCount() - 1; i != 0; i--)
{
BasicBlock* const block = dfsTree->GetPostOrder(i);
BasicBlock* const blockToMove = dfsTree->GetPostOrder(i - 1);
fgUnlinkBlock(blockToMove);
fgInsertBBafter(block, blockToMove);
}

return;
}

// We have EH regions, so make sure the RPO doesn't make any regions non-contiguous
// by only reordering blocks within the same region
//
for (unsigned i = dfsTree->GetPostOrderCount() - 1; i != 0; i--)
{
BasicBlock* const block = dfsTree->GetPostOrder(i);
BasicBlock* const blockToMove = dfsTree->GetPostOrder(i - 1);

if (BasicBlock::sameEHRegion(block, blockToMove))
{
fgUnlinkBlock(blockToMove);
fgInsertBBafter(block, blockToMove);
}
}

// The RPO won't change the entry blocks of any try regions, but reordering can change the last block in a region
// (for example, by pushing throw blocks unreachable via normal flow to the end of the region).
// First, determine the new try region ends.
//
BasicBlock** const tryRegionEnds = new (this, CMK_Generic) BasicBlock* [compHndBBtabCount] {};

// If we aren't using EH funclets, fgFirstFuncletBB is nullptr, and we'll traverse the entire blocklist.
// Else if we do have funclets, don't extend the end of a try region to include its funclet blocks.
//
for (BasicBlock* block = fgFirstBB; block != fgFirstFuncletBB; block = block->Next())
{
if (block->hasTryIndex())
{
tryRegionEnds[block->getTryIndex()] = block;
}
}

// Now, update the EH descriptors
//
unsigned XTnum;
EHblkDsc* HBtab;
for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
{
BasicBlock* const tryEnd = tryRegionEnds[XTnum];

// We can have multiple EH descriptors map to the same try region,
// but we will only update the try region's last block pointer at the index given by BasicBlock::getTryIndex,
// so the duplicate EH descriptors' last block pointers can be null.
// Tolerate this.
//
if (tryEnd == nullptr)
{
continue;
}

// Update the end pointer of this try region to the new last block
//
HBtab->ebdTryLast = tryEnd;
const unsigned enclosingTryIndex = HBtab->ebdEnclosingTryIndex;

// If this try region is nested in another one, we might need to update its enclosing region's end block
//
if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
{
BasicBlock* const enclosingTryEnd = tryRegionEnds[enclosingTryIndex];

// If multiple EH descriptors map to the same try region,
// then the enclosing region's last block might be null in the table, so set it here.
// Similarly, if the enclosing region ends right before the nested region begins,
// extend the enclosing region's last block to the end of the nested region.
//
if ((enclosingTryEnd == nullptr) || enclosingTryEnd->NextIs(HBtab->ebdTryBeg))
{
tryRegionEnds[enclosingTryIndex] = tryEnd;
}
}
}
}

//-------------------------------------------------------------
// fgUpdateFlowGraphPhase: run flow graph optimization as a
// phase, with no tail duplication
Expand Down
Loading
Loading