Skip to content

Commit

Permalink
avoid redundant moves for SIMD shuffle down intrinsic
Browse files Browse the repository at this point in the history
SIMD shuffle down intrinsic takes "current" and "next" values and
combines them into 2N variable (where N is SIMD size) to deal with
OOB lanes when shuffling. The moves to initialize this temporary
variable are materialized in emit vISA pass, thus when multiple
shuffle intrinsic calls have the identical source operands, we end
up with multiple temp variables and redundant moves.

This change adds per basic block caching of temporary variable, so
multiple shuffle in the same BB can share common source.
  • Loading branch information
pkwasnie-intel authored and igcbot committed Jun 20, 2023
1 parent ed2657a commit 097633c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 27 deletions.
66 changes: 39 additions & 27 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5477,40 +5477,52 @@ void EmitPass::emitSimdShuffleDown(llvm::Instruction* inst)
uint16_t nbElements = numLanes(m_SimdMode) * 2;

// Join current and Next Data
CVariable* pCombinedData = m_currShader->GetNewVariable(
nbElements,
m_destination->GetType(),
m_destination->GetAlign(),
"ShuffleTmp");

auto CopyData = [this](CVariable* pDestinationData, CVariable* pSourceData, uint32_t offset)
SBasicBlock::SimdShuffleDownSrcTy srcPair(inst->getOperand(0), inst->getOperand(1));
SBasicBlock* pCurrBlk = getCurrentBlock();
CVariable* pCombinedData = pCurrBlk ? pCurrBlk->m_simdShuffleDownSrc[srcPair] : nullptr;
if (pCombinedData == nullptr)
{
for (uint32_t i = 0; i < m_currShader->m_numberInstance; i++)
{
IGC_ASSERT_MESSAGE(!m_encoder->IsSecondHalf(), "This emitter must be called only once for simd32!");
uint32_t currentOffset = offset + numLanes(m_encoder->GetSimdSize()) * i;
bool isSecondHalf = i == 1;
pCombinedData = m_currShader->GetNewVariable(
nbElements,
m_destination->GetType(),
m_destination->GetAlign(),
"ShuffleTmp");

if (isSecondHalf)
auto CopyData = [this](CVariable* pDestinationData, CVariable* pSourceData, uint32_t offset)
{
for (uint32_t i = 0; i < m_currShader->m_numberInstance; i++)
{
m_encoder->SetSecondHalf(true);
}
IGC_ASSERT_MESSAGE(!m_encoder->IsSecondHalf(), "This emitter must be called only once for simd32!");
uint32_t currentOffset = offset + numLanes(m_encoder->GetSimdSize()) * i;
bool isSecondHalf = i == 1;

m_encoder->SetSimdSize(m_encoder->GetSimdSize());
m_encoder->SetDstSubReg(currentOffset);
m_encoder->SetNoMask();
m_encoder->Copy(pDestinationData, pSourceData);
m_encoder->Push();
if (isSecondHalf)
{
m_encoder->SetSecondHalf(true);
}

if (isSecondHalf)
{
m_encoder->SetSecondHalf(false);
m_encoder->SetSimdSize(m_encoder->GetSimdSize());
m_encoder->SetDstSubReg(currentOffset);
m_encoder->SetNoMask();
m_encoder->Copy(pDestinationData, pSourceData);
m_encoder->Push();

if (isSecondHalf)
{
m_encoder->SetSecondHalf(false);
}
}
}
};
};

CopyData(pCombinedData, pCurrentData, 0);
CopyData(pCombinedData, pNextData, numLanes(m_encoder->GetSimdSize()) * m_currShader->m_numberInstance);

CopyData(pCombinedData, pCurrentData, 0);
CopyData(pCombinedData, pNextData, numLanes(m_encoder->GetSimdSize()) * m_currShader->m_numberInstance);
// save it for possible re-use later.
if (pCurrBlk)
{
pCurrBlk->m_simdShuffleDownSrc[srcPair] = pCombinedData;
}
}

// Emit mov with direct addressing when delta is a compile-time constant.
const bool useDirectAddressing = pDelta->IsImmediate()
Expand Down
6 changes: 6 additions & 0 deletions IGC/Compiler/CISACodeGen/PatternMatchPass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,15 @@ namespace IGC
// this is currently set only when we enable the A64 WA
CVariable* m_activeMask = nullptr; // flag var
CVariable* m_numActiveLanes = nullptr; // general var, #lanes for the entire dispatch size.

// caches combined current+next sources for SimdShuffleDown intrinsic
typedef std::pair<llvm::Value*, llvm::Value*> SimdShuffleDownSrcTy;
std::map<SimdShuffleDownSrcTy, CVariable*> m_simdShuffleDownSrc;

void clearCaching() {
m_activeMask = nullptr;
m_numActiveLanes = nullptr;
m_simdShuffleDownSrc.clear();
}
};

Expand Down

0 comments on commit 097633c

Please sign in to comment.