Skip to content

Commit

Permalink
Scratch space separation control
Browse files Browse the repository at this point in the history
Add 'EnableSeparateScratchWA' regkey to control if we should apply
the workaround when calculating slot0 and slot1 sizes
  • Loading branch information
iwwu authored and igcbot committed Sep 13, 2023
1 parent 4a82e68 commit ea92b75
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 10 deletions.
3 changes: 2 additions & 1 deletion IGC/Compiler/CISACodeGen/CISABuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5323,7 +5323,8 @@ namespace IGC
// slot1 is used for spilling only when SeparatingSpillAndPrivateScratchMemorySpace is on
// and Slot0 is used for IGC private memory
if (m_program->m_Platform->hasScratchSurface() &&
m_program->m_DriverInfo->supportsSeparatingSpillAndPrivateScratchMemorySpace())
(m_program->m_DriverInfo->supportsSeparatingSpillAndPrivateScratchMemorySpace() ||
m_program->GetContext()->getModuleMetaData()->compOpt.SeparateSpillPvtScratchSpace))
{
V(vKernel->AddKernelAttribute("SepSpillPvtSS", 0, nullptr));
}
Expand Down
11 changes: 8 additions & 3 deletions IGC/Compiler/CISACodeGen/CShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,15 @@ CShader::CShader(Function* pFunc, CShaderProgram* pProgram)
m_SavedFP = nullptr;

bool SepSpillPvtSS = m_ctx->platform.hasScratchSurface() &&
m_ctx->m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace();
(m_ctx->m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace() ||
m_ctx->getModuleMetaData()->compOpt.SeparateSpillPvtScratchSpace);
bool SeparateScratchWA =
IGC_IS_FLAG_ENABLED(EnableSeparateScratchWA) &&
!m_ctx->getModuleMetaData()->compOpt.DisableSeparateScratchWA;
m_simdProgram.init(!m_ctx->platform.hasScratchSurface(),
m_ctx->platform.maxPerThreadScratchSpace(),
GetContext()->getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory,
SepSpillPvtSS);
SepSpillPvtSS, SeparateScratchWA);
}

void CShader::InitEncoder(SIMDMode simdSize, bool canAbortOnSpill, ShaderDispatchMode shaderMode)
Expand Down Expand Up @@ -3860,7 +3864,8 @@ bool CShader::CompileSIMDSizeInCommon(SIMDMode simdMode)
m_simdProgram.setScratchSpaceUsedByShader(m_ScratchSpaceSize);

if (m_ctx->platform.hasScratchSurface() &&
m_ctx->m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace())
(m_ctx->m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace() ||
m_ctx->getModuleMetaData()->compOpt.SeparateSpillPvtScratchSpace))
{
ret = ((m_simdProgram.getScratchSpaceUsageInSlot0() <= m_ctx->platform.maxPerThreadScratchSpace()) &&
(m_simdProgram.getScratchSpaceUsageInSlot1() <= m_ctx->platform.maxPerThreadScratchSpace()));
Expand Down
20 changes: 15 additions & 5 deletions IGC/Compiler/CodeGenPublic.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ namespace IGC
bool m_roundPower2KBytes = false;
bool m_UseScratchSpacePrivateMemory = true;
bool m_SeparatingSpillAndPrivateScratchMemorySpace = false;
bool m_EnableSeparateScratchWA = true;
unsigned int m_scratchSpaceSizeLimit = 0;
unsigned int m_numGRFTotal = 128;
unsigned int m_numGRFSpillFill = 0;
Expand Down Expand Up @@ -195,12 +196,14 @@ namespace IGC
}
}

void init(bool roundPower2KBytes, unsigned int scratchSpaceSizeLimitT, bool useScratchSpacePrivateMemory, bool SepSpillPvtSS)
void init(bool roundPower2KBytes, unsigned int scratchSpaceSizeLimitT, bool useScratchSpacePrivateMemory, bool SepSpillPvtSS,
bool SeparateScratchWA)
{
m_roundPower2KBytes = roundPower2KBytes;
m_scratchSpaceSizeLimit = scratchSpaceSizeLimitT;
m_UseScratchSpacePrivateMemory = useScratchSpacePrivateMemory;
m_SeparatingSpillAndPrivateScratchMemorySpace = SepSpillPvtSS;
m_EnableSeparateScratchWA = SeparateScratchWA;
}

// if IGC needs scratch for private memory, we use slot0 for private
Expand All @@ -218,7 +221,7 @@ namespace IGC
{
result += (m_scratchSpaceUsedBySpills + m_scratchSpaceUsedByGtpin);
}
else
else if (m_EnableSeparateScratchWA)
{
// \TODO: doubts about driver-compiler interface, conservatively set the size
// to the max of two slots
Expand All @@ -236,9 +239,16 @@ namespace IGC
unsigned int result = 0;
if (m_SeparatingSpillAndPrivateScratchMemorySpace && slot0_offset > 0)
{
// \TODO: doubts about driver-compiler interface, conservatively set the size
// to the max of two slots
result = std::max(slot0_offset, m_scratchSpaceUsedBySpills + m_scratchSpaceUsedByGtpin);
if (m_EnableSeparateScratchWA)
{
// \TODO: doubts about driver-compiler interface, conservatively set the size
// to the max of two slots
result = std::max(slot0_offset, m_scratchSpaceUsedBySpills + m_scratchSpaceUsedByGtpin);
}
else
{
result = m_scratchSpaceUsedBySpills + m_scratchSpaceUsedByGtpin;
}
}
result = roundSize(result);
IGC_ASSERT(result <= m_scratchSpaceSizeLimit);
Expand Down
5 changes: 4 additions & 1 deletion IGC/Compiler/ModuleAllocaAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,10 @@ bool ModuleAllocaAnalysis::safeToUseScratchSpace() const

// if one API doesn't support stateless, we should try to use smallest dispatch mode
// which can hold more pvt_data to avoid error out.
if (Ctx.platform.hasScratchSurface() && Ctx.m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace() && !supportsStatelessSpacePrivateMemory)
if (Ctx.platform.hasScratchSurface() &&
(Ctx.m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace() ||
Ctx.getModuleMetaData()->compOpt.SeparateSpillPvtScratchSpace) &&
!supportsStatelessSpacePrivateMemory)
simd_size = numLanes(Ctx.platform.getMinDispatchMode());

unsigned maxScratchSpaceBytes = Ctx.platform.maxPerThreadScratchSpace();
Expand Down
2 changes: 2 additions & 0 deletions IGC/common/MDFrameWork.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,8 @@ namespace IGC
bool DashGSpecified = false;
bool FastCompilation = false;
bool UseScratchSpacePrivateMemory = true;
bool SeparateSpillPvtScratchSpace = false;
bool DisableSeparateScratchWA = false;
bool RelaxedBuiltins = false;
bool SubgroupIndependentForwardProgressRequired = true;
bool GreaterThan2GBBufferRequired = true;
Expand Down
1 change: 1 addition & 0 deletions IGC/common/igc_flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ DECLARE_IGC_REGKEY(bool, DPASTokenReduction, false, "optimization to
DECLARE_IGC_REGKEY(bool, EnableAdd3, true, "Enable Add3. XeHP+ only", true)
DECLARE_IGC_REGKEY(bool, EnableBfn, true, "Enable Bfn. XeHP+ only", true)
DECLARE_IGC_REGKEY(bool, SeparateSpillPvtScratchSpace, false, "Separate scratch spaces for spillfill and privatememory. XeHP and above only. Test only. Remove it once stabalized.", true)
DECLARE_IGC_REGKEY(bool, EnableSeparateScratchWA, true, "Apply the workaround in slot0 and slot1 sizes when separating scratch spacesSeparate scratch space.", true)
DECLARE_IGC_REGKEY(bool, DisableThreeALUPipes, false, "Disable three ALU Pipelines. XeHP only", true)
DECLARE_IGC_REGKEY(bool, Enable16DWURBWrite, false, "Enable 16 Dword URB Write messages", true)
DECLARE_IGC_REGKEY(bool, Enable16OWSLMBlockRW, true, "Enable 16 OWord (8 GRF) SLM block read/write message", true)
Expand Down

0 comments on commit ea92b75

Please sign in to comment.