diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp index 9e2e4fdcf716..cf9494b54752 100644 --- a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp +++ b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp @@ -5477,40 +5477,52 @@ void EmitPass::emitSimdShuffleDown(llvm::Instruction* inst) uint16_t nbElements = numLanes(m_SimdMode) * 2; // Join current and Next Data - CVariable* pCombinedData = m_currShader->GetNewVariable( - nbElements, - m_destination->GetType(), - m_destination->GetAlign(), - "ShuffleTmp"); - - auto CopyData = [this](CVariable* pDestinationData, CVariable* pSourceData, uint32_t offset) + SBasicBlock::SimdShuffleDownSrcTy srcPair(inst->getOperand(0), inst->getOperand(1)); + SBasicBlock* pCurrBlk = getCurrentBlock(); + CVariable* pCombinedData = pCurrBlk ? pCurrBlk->m_simdShuffleDownSrc[srcPair] : nullptr; + if (pCombinedData == nullptr) { - for (uint32_t i = 0; i < m_currShader->m_numberInstance; i++) - { - IGC_ASSERT_MESSAGE(!m_encoder->IsSecondHalf(), "This emitter must be called only once for simd32!"); - uint32_t currentOffset = offset + numLanes(m_encoder->GetSimdSize()) * i; - bool isSecondHalf = i == 1; + pCombinedData = m_currShader->GetNewVariable( + nbElements, + m_destination->GetType(), + m_destination->GetAlign(), + "ShuffleTmp"); - if (isSecondHalf) + auto CopyData = [this](CVariable* pDestinationData, CVariable* pSourceData, uint32_t offset) + { + for (uint32_t i = 0; i < m_currShader->m_numberInstance; i++) { - m_encoder->SetSecondHalf(true); - } + IGC_ASSERT_MESSAGE(!m_encoder->IsSecondHalf(), "This emitter must be called only once for simd32!"); + uint32_t currentOffset = offset + numLanes(m_encoder->GetSimdSize()) * i; + bool isSecondHalf = i == 1; - m_encoder->SetSimdSize(m_encoder->GetSimdSize()); - m_encoder->SetDstSubReg(currentOffset); - m_encoder->SetNoMask(); - m_encoder->Copy(pDestinationData, pSourceData); - m_encoder->Push(); + if (isSecondHalf) + { + m_encoder->SetSecondHalf(true); + } - if (isSecondHalf) - { - m_encoder->SetSecondHalf(false); + m_encoder->SetSimdSize(m_encoder->GetSimdSize()); + m_encoder->SetDstSubReg(currentOffset); + m_encoder->SetNoMask(); + m_encoder->Copy(pDestinationData, pSourceData); + m_encoder->Push(); + + if (isSecondHalf) + { + m_encoder->SetSecondHalf(false); + } } - } - }; + }; + + CopyData(pCombinedData, pCurrentData, 0); + CopyData(pCombinedData, pNextData, numLanes(m_encoder->GetSimdSize()) * m_currShader->m_numberInstance); - CopyData(pCombinedData, pCurrentData, 0); - CopyData(pCombinedData, pNextData, numLanes(m_encoder->GetSimdSize()) * m_currShader->m_numberInstance); + // save it for possible re-use later. + if (pCurrBlk) + { + pCurrBlk->m_simdShuffleDownSrc[srcPair] = pCombinedData; + } + } // Emit mov with direct addressing when delta is a compile-time constant. const bool useDirectAddressing = pDelta->IsImmediate() diff --git a/IGC/Compiler/CISACodeGen/PatternMatchPass.hpp b/IGC/Compiler/CISACodeGen/PatternMatchPass.hpp index c24a13b00763..16bcdd49ea24 100644 --- a/IGC/Compiler/CISACodeGen/PatternMatchPass.hpp +++ b/IGC/Compiler/CISACodeGen/PatternMatchPass.hpp @@ -111,9 +111,15 @@ namespace IGC // this is currently set only when we enable the A64 WA CVariable* m_activeMask = nullptr; // flag var CVariable* m_numActiveLanes = nullptr; // general var, #lanes for the entire dispatch size. + + // caches combined current+next sources for SimdShuffleDown intrinsic + typedef std::pair SimdShuffleDownSrcTy; + std::map m_simdShuffleDownSrc; + void clearCaching() { m_activeMask = nullptr; m_numActiveLanes = nullptr; + m_simdShuffleDownSrc.clear(); } };