Skip to content

Commit

Permalink
[Autobackout][FuncReg]Revert of change: 1687b34
Browse files Browse the repository at this point in the history
 Do Memcpy lowering early to allow constants promotion

Do MemCpy lowering early in the optimization phase.
It must be before SROA and InstCombine to ensure the constants are promoted
to the immediates before they are lowered to loads from constantBase in the
befinning of CG phase.
  • Loading branch information
admitric authored and igcbot committed Jun 21, 2023
1 parent d38dc66 commit cd9b719
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 30 deletions.
2 changes: 0 additions & 2 deletions IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1502,8 +1502,6 @@ void OptimizeIR(CodeGenContext* const pContext)
// run custom safe opts to potentially get rid of indirect
// addressing of private arrays, see visitLoadInst
mpm.add(new CustomSafeOptPass());
// lower memcpy to be able to promote the constants to immediated with SROA and InstCombine (later)
mpm.add(createReplaceUnsupportedIntrinsicsPass(true));
mpm.add(createSROAPass());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ using namespace llvm;
using namespace IGC;
using IGCLLVM::getAlign;

namespace IGC
namespace
{
/// ReplaceIntrinsics pass lowers calls to unsupported intrinsics functions.
// Two llvm instrinsics are replaced llvm.memcpy and llvm.memset. Both appear in SPIR spec.
Expand All @@ -45,7 +45,7 @@ namespace IGC
typedef void (ReplaceUnsupportedIntrinsics::* MemFuncPtr_t)(IntrinsicInst*);
static char ID;

ReplaceUnsupportedIntrinsics(bool replaceOnlyMemCpyWithoutLoop=false);
ReplaceUnsupportedIntrinsics();

~ReplaceUnsupportedIntrinsics() {}

Expand All @@ -66,7 +66,6 @@ namespace IGC
private:
CodeGenContext* m_Ctx;
std::vector<llvm::IntrinsicInst*> m_instsToReplace;
bool m_replaceOnlyMemCpyWithoutLoops;

/// Helper
///
Expand Down Expand Up @@ -141,8 +140,7 @@ const std::map< Intrinsic::ID, ReplaceUnsupportedIntrinsics::MemFuncPtr_t > Repl
{ Intrinsic::ctlz, &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros }
};

ReplaceUnsupportedIntrinsics::ReplaceUnsupportedIntrinsics(bool replaceOnlyMemCpyWithoutLoops) :
FunctionPass(ID), m_replaceOnlyMemCpyWithoutLoops(replaceOnlyMemCpyWithoutLoops)
ReplaceUnsupportedIntrinsics::ReplaceUnsupportedIntrinsics() : FunctionPass(ID)
{
initializeReplaceUnsupportedIntrinsicsPass(*PassRegistry::getPassRegistry());
}
Expand Down Expand Up @@ -519,10 +517,6 @@ void ReplaceUnsupportedIntrinsics::replaceMemcpy(IntrinsicInst* I)
// without generating the loop.
if (NewCount > 0)
{
bool noLoop = NewCount < IGC_GET_FLAG_VALUE(MemCpyLoweringUnrollThreshold);
if (m_replaceOnlyMemCpyWithoutLoops && !noLoop) {
return;
}
vSrc = Builder.CreateBitCast(SkipBitCast(Src), PointerType::get(VecTys[0], SrcAS), "memcpy_vsrc");
vDst = Builder.CreateBitCast(SkipBitCast(Dst), PointerType::get(VecTys[0], DstAS), "memcpy_vdst");

Expand All @@ -536,7 +530,7 @@ void ReplaceUnsupportedIntrinsics::replaceMemcpy(IntrinsicInst* I)
SrcAlign = adjust_align < SrcAlign ? adjust_align : SrcAlign;

// If NewCount is less than the threshold, don't generate loop.
if (noLoop)
if (NewCount < IGC_GET_FLAG_VALUE(MemCpyLoweringUnrollThreshold))
{
for (unsigned i = 0; i < NewCount; ++i)
{
Expand Down Expand Up @@ -588,9 +582,6 @@ void ReplaceUnsupportedIntrinsics::replaceMemcpy(IntrinsicInst* I)
}
else
{
if (m_replaceOnlyMemCpyWithoutLoops) {
return;
}
Src = Builder.CreateBitCast(SkipBitCast(Src), TySrcPtrI8, "memcpy_src");
Dst = Builder.CreateBitCast(SkipBitCast(Dst), TyDstPtrI8, "memcpy_dst");
// Fall back to i8 copy
Expand Down Expand Up @@ -824,11 +815,6 @@ void ReplaceUnsupportedIntrinsics::replaceMemset(IntrinsicInst* I)
// First, insert main loop before MC.
if (NewCount > 0)
{
bool noLoop = NewCount < IGC_GET_FLAG_VALUE(MemCpyLoweringUnrollThreshold);
if (m_replaceOnlyMemCpyWithoutLoops && !noLoop) {
return;
}

PointerType* PTy = PointerType::get(VecTys[0], AS);
vSrc = replicateScalar(Src, VecTys[0], MS);
vDst = Builder.CreateBitCast(SkipBitCast(Dst), PTy, "memset_vdst");
Expand All @@ -842,7 +828,7 @@ void ReplaceUnsupportedIntrinsics::replaceMemset(IntrinsicInst* I)
Align = adjust_align < Align ? adjust_align : Align;

// If NewCount is less than the threshold, don't generate loop.
if (noLoop)
if (NewCount < IGC_GET_FLAG_VALUE(MemCpyLoweringUnrollThreshold))
{
for (unsigned i = 0; i < NewCount; ++i)
{
Expand Down Expand Up @@ -888,9 +874,6 @@ void ReplaceUnsupportedIntrinsics::replaceMemset(IntrinsicInst* I)
}
else
{
if (m_replaceOnlyMemCpyWithoutLoops) {
return;
}
Dst = Builder.CreateBitCast(SkipBitCast(Dst), TyPtrI8, "memset_dst");
// Fall back to i8 copy
Instruction* IV = insertLoop(MS, LPCount, "memset");
Expand Down Expand Up @@ -1142,9 +1125,7 @@ Value* ReplaceUnsupportedIntrinsics::evaluateCtlz64bit(IGCLLVM::IRBuilder<>* Bui

void ReplaceUnsupportedIntrinsics::visitIntrinsicInst(IntrinsicInst& I) {
if (m_intrinsicToFunc.find(I.getIntrinsicID()) != m_intrinsicToFunc.end()) {
if (!m_replaceOnlyMemCpyWithoutLoops || isa<MemCpyInst>(&I) || isa<MemSetInst>(&I)) {
m_instsToReplace.push_back(&I);
}
m_instsToReplace.push_back(&I);
}
}

Expand All @@ -1159,7 +1140,7 @@ bool ReplaceUnsupportedIntrinsics::runOnFunction(Function& F)
return !m_instsToReplace.empty();
}

FunctionPass* IGC::createReplaceUnsupportedIntrinsicsPass(bool replaceOnlyMemCpyWithoutLoops)
FunctionPass* IGC::createReplaceUnsupportedIntrinsicsPass()
{
return new IGC::ReplaceUnsupportedIntrinsics(replaceOnlyMemCpyWithoutLoops);
return new ReplaceUnsupportedIntrinsics();
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ namespace llvm

namespace IGC
{
llvm::FunctionPass* createReplaceUnsupportedIntrinsicsPass(bool replaceOnlyMemCpyWithoutLoops=false);
llvm::FunctionPass* createReplaceUnsupportedIntrinsicsPass();
} // namespace IGC

0 comments on commit cd9b719

Please sign in to comment.