Skip to content

Commit

Permalink
Optimize vector oword block load
Browse files Browse the repository at this point in the history
Use vector copy instead of oword block load on specific device.
  • Loading branch information
ichenkai authored and igcbot committed Aug 30, 2024
1 parent dbad589 commit a7ed19e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
12 changes: 11 additions & 1 deletion IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17762,7 +17762,6 @@ void EmitPass::emitVectorLoad(LoadInst* inst, Value* offset, ConstantInt* immOff
return;
}


bool bEmulateDWAligned = false;

// generate oword-load if possible
Expand All @@ -17783,6 +17782,17 @@ void EmitPass::emitVectorLoad(LoadInst* inst, Value* offset, ConstantInt* immOff
bool needTemp = (!destUniform || !IsGRFAligned(m_destination, EALIGN_GRF));
CVariable * loadDest = m_destination;

// (needTemp = false) means GRF aligned as (destUniform = true)
if (!useDWAligned && useOWAligned && !needTemp &&
srcUniform && destUniform &&
destType == VISA_Type::ISA_TYPE_F &&
eOffset->GetType() == VISA_Type::ISA_TYPE_UD &&
m_currShader->m_Platform->allowFastestSIMDVectorLoad4Perf())
{
emitVectorCopy(loadDest, eOffset, elts);
return;
}

if (useOWAligned)
{
// Offset needs to be in OW!
Expand Down
6 changes: 6 additions & 0 deletions IGC/Compiler/CISACodeGen/Platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1787,5 +1787,11 @@ bool allowDivergentControlFlowRayQueryCheckRelease() const
return m_WaTable.Wa_22019804511 != 0;
}

bool allowFastestSIMDVectorLoad4Perf() const
{
return m_platformInfo.eProductFamily == IGFX_ALDERLAKE_P &&
m_platformInfo.usDeviceID == 0x4626;
}

};
}//namespace IGC

0 comments on commit a7ed19e

Please sign in to comment.