Skip to content

Commit

Permalink
Report error if intel_reqd_sub_group_size` cannot be satisfied
Browse files Browse the repository at this point in the history
Without this change, IGC silently switches to SIMD16 for kernels with
nested stackcalls even though a kernel has `intel_reqd_sub_group_size(32)``
attribute attached
  • Loading branch information
aratajew authored and igcbot committed Sep 4, 2024
1 parent 6c9722c commit d8e59a1
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 35 deletions.
90 changes: 55 additions & 35 deletions IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,39 @@ namespace IGC
return result;
}

uint32_t COpenCLKernel::getReqdSubGroupSize(llvm::Function& F, MetaDataUtils* MDUtils) const
{
FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();

// Finds the kernel and get the group simd size from the kernel
if (m_FGA)
{
llvm::Function* Kernel = &F;
auto FG = m_FGA->getGroup(&F);
Kernel = FG->getHead();
funcInfoMD = MDUtils->getFunctionsInfoItem(Kernel);
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
}
return simd_size;
}

uint32_t COpenCLKernel::getMaxPressure(llvm::Function& F, MetaDataUtils* MDUtils) const
{
FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();

if (m_FGA)
{
llvm::Function* Kernel = &F;
auto FG = m_FGA->getGroup(&F);
Kernel = FG->getHead();
funcInfoMD = MDUtils->getFunctionsInfoItem(Kernel);
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
}
return maxPressure;
}

void COpenCLKernel::CreateKernelArgInfo()
{
auto funcMDIt = m_Context->getModuleMetaData()->FuncMD.find(entry);
Expand Down Expand Up @@ -3659,7 +3692,25 @@ namespace IGC

// Func and Perf checks pass, compile this SIMD
if (simdStatus == SIMDStatus::SIMD_PASS)
{
return true;
}
// Report an error if intel_reqd_sub_group_size cannot be satisfied
else
{
MetaDataUtils* pMdUtils = EP.getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
CodeGenContext* ctx = GetContext();
auto reqdSubGroupSize = getReqdSubGroupSize(F, pMdUtils);
if (reqdSubGroupSize == numLanes(simdMode))
{
ctx->EmitError(
(std::string("Cannot compile a kernel in the SIMD mode specified by intel_reqd_sub_group_size(") +
std::to_string(reqdSubGroupSize) +
std::string(")")).c_str(),
&F);
return false;
}
}

// Functional failure, skip compiling this SIMD
if (simdStatus == SIMDStatus::SIMD_FUNC_FAIL)
Expand All @@ -3682,28 +3733,16 @@ namespace IGC
CodeGenContext* pCtx = GetContext();
MetaDataUtils* pMdUtils = EP.getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
uint32_t simd_size = getReqdSubGroupSize(F, pMdUtils);
bool hasSubGroupForce = hasSubGroupIntrinsicPVC(F);
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();

// Finds the kernel and get the group simd size from the kernel
if (m_FGA)
{
llvm::Function* Kernel = &F;
auto FG = m_FGA->getGroup(&F);
Kernel = FG->getHead();
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
}
uint32_t maxPressure = getMaxPressure(F, pMdUtils);

auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
bool hasStackCall = FG && FG->hasStackCall();
bool isIndirectGroup = FG && m_FGA->isIndirectCallGroup(FG);
bool hasSubroutine = FG && !FG->isSingleIgnoringStackOverflowDetection() && !hasStackCall && !isIndirectGroup;
bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup);


if (simd_size == 0)
{
if (maxPressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) &&
Expand Down Expand Up @@ -3849,19 +3888,8 @@ namespace IGC
MetaDataUtils* pMdUtils = EP.getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
ModuleMetaData* modMD = pCtx->getModuleMetaData();
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();

// Finds the kernel and get the group simd size from the kernel
if (m_FGA)
{
llvm::Function* Kernel = &F;
auto FG = m_FGA->getGroup(&F);
Kernel = FG->getHead();
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
}
uint32_t simd_size = getReqdSubGroupSize(F, pMdUtils);
uint32_t maxPressure = getMaxPressure(F, pMdUtils);

// For simd variant functions, detect which SIMD sizes are needed
if (compileFunctionVariants && F.hasFnAttribute("variant-function-def"))
Expand Down Expand Up @@ -3896,14 +3924,6 @@ namespace IGC
bool hasIndirectCall = FG && FG->hasIndirectCall();
if (hasNestedCall || hasIndirectCall || isIndirectGroup)
{
// If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed
if (simd_size == 32)
{
pCtx->EmitWarning("Detected 'reqd_sub_group_size=32', but compiling to SIMD16 due to enabling CallWA, which does not support SIMD32 when nested/indirect calls are present.");
llvm::Function* Kernel = FG->getHead();
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
funcInfoMD->getSubGroupSize()->setSIMDSize(16);
}
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
return SIMDStatus::SIMD_FUNC_FAIL;
}
Expand Down
3 changes: 3 additions & 0 deletions IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ namespace IGC
std::string getKernelArgTypeQualifier(const FunctionMetaData& funcMD, uint argIndex) const;
std::string getKernelArgAddressQualifier(const FunctionMetaData& funcMD, uint argIndex) const;
std::string getKernelArgAccessQualifier(const FunctionMetaData& funcMD, uint argIndex) const;
// Helper function to get SIMD size specified in intel_reqd_sub_group_size attribute
uint32_t getReqdSubGroupSize(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const;
uint32_t getMaxPressure(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const;
};

void CodeGen(OpenCLProgramContext* ctx);
Expand Down
27 changes: 27 additions & 0 deletions IGC/ocloc_tests/features/reqd_sub_group_size/report_error.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/

// REQUIRES: dg2-supported
// RUN: not ocloc compile -file %s -device dg2 | FileCheck %s

// IGC enables EUFusion CallWA for DG2 when nested stackcalls or indirect calls
// are present in a module. The workaround is not supported in SIMD32. This test
// verifies if a proper error message is printed if CallWA is required and
// intel_reqd_sub_group_size kernel attribute is set to 32.

// CHECK: error: Cannot compile a kernel in the SIMD mode specified by intel_reqd_sub_group_size(32)
// CHECK-NEXT: in kernel: 'test_simple'

int fact(int n) {
return n < 2 ? 1 : n * fact(n - 1);
}

__attribute__((intel_reqd_sub_group_size(32)))
kernel void test_simple(global int* out, int n) {
out[0] = fact(n);
}

0 comments on commit d8e59a1

Please sign in to comment.