From d8e59a19a75130fc254d04b33750a1563da28634 Mon Sep 17 00:00:00 2001 From: Andrzej Ratajewski Date: Mon, 2 Sep 2024 08:17:25 +0000 Subject: [PATCH] Report error if `intel_reqd_sub_group_size`` cannot be satisfied Without this change, IGC silently switches to SIMD16 for kernels with nested stackcalls even though a kernel has `intel_reqd_sub_group_size(32)`` attribute attached --- .../CISACodeGen/OpenCLKernelCodeGen.cpp | 90 +++++++++++-------- .../CISACodeGen/OpenCLKernelCodeGen.hpp | 3 + .../reqd_sub_group_size/report_error.cl | 27 ++++++ 3 files changed, 85 insertions(+), 35 deletions(-) create mode 100644 IGC/ocloc_tests/features/reqd_sub_group_size/report_error.cl diff --git a/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp b/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp index dab58e79d537..07c32510734a 100644 --- a/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp +++ b/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp @@ -643,6 +643,39 @@ namespace IGC return result; } + uint32_t COpenCLKernel::getReqdSubGroupSize(llvm::Function& F, MetaDataUtils* MDUtils) const + { + FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F); + int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize(); + + // Finds the kernel and get the group simd size from the kernel + if (m_FGA) + { + llvm::Function* Kernel = &F; + auto FG = m_FGA->getGroup(&F); + Kernel = FG->getHead(); + funcInfoMD = MDUtils->getFunctionsInfoItem(Kernel); + simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize(); + } + return simd_size; + } + + uint32_t COpenCLKernel::getMaxPressure(llvm::Function& F, MetaDataUtils* MDUtils) const + { + FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F); + unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure(); + + if (m_FGA) + { + llvm::Function* Kernel = &F; + auto FG = m_FGA->getGroup(&F); + Kernel = FG->getHead(); + funcInfoMD = MDUtils->getFunctionsInfoItem(Kernel); + maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure(); + } + return maxPressure; + } + void COpenCLKernel::CreateKernelArgInfo() { auto funcMDIt = m_Context->getModuleMetaData()->FuncMD.find(entry); @@ -3659,7 +3692,25 @@ namespace IGC // Func and Perf checks pass, compile this SIMD if (simdStatus == SIMDStatus::SIMD_PASS) + { return true; + } + // Report an error if intel_reqd_sub_group_size cannot be satisfied + else + { + MetaDataUtils* pMdUtils = EP.getAnalysis().getMetaDataUtils(); + CodeGenContext* ctx = GetContext(); + auto reqdSubGroupSize = getReqdSubGroupSize(F, pMdUtils); + if (reqdSubGroupSize == numLanes(simdMode)) + { + ctx->EmitError( + (std::string("Cannot compile a kernel in the SIMD mode specified by intel_reqd_sub_group_size(") + + std::to_string(reqdSubGroupSize) + + std::string(")")).c_str(), + &F); + return false; + } + } // Functional failure, skip compiling this SIMD if (simdStatus == SIMDStatus::SIMD_FUNC_FAIL) @@ -3682,20 +3733,9 @@ namespace IGC CodeGenContext* pCtx = GetContext(); MetaDataUtils* pMdUtils = EP.getAnalysis().getMetaDataUtils(); FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F); - int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize(); + uint32_t simd_size = getReqdSubGroupSize(F, pMdUtils); bool hasSubGroupForce = hasSubGroupIntrinsicPVC(F); - unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure(); - - // Finds the kernel and get the group simd size from the kernel - if (m_FGA) - { - llvm::Function* Kernel = &F; - auto FG = m_FGA->getGroup(&F); - Kernel = FG->getHead(); - funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel); - simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize(); - maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure(); - } + uint32_t maxPressure = getMaxPressure(F, pMdUtils); auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr; bool hasStackCall = FG && FG->hasStackCall(); @@ -3703,7 +3743,6 @@ namespace IGC bool hasSubroutine = FG && !FG->isSingleIgnoringStackOverflowDetection() && !hasStackCall && !isIndirectGroup; bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup); - if (simd_size == 0) { if (maxPressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) && @@ -3849,19 +3888,8 @@ namespace IGC MetaDataUtils* pMdUtils = EP.getAnalysis().getMetaDataUtils(); ModuleMetaData* modMD = pCtx->getModuleMetaData(); FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F); - int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize(); - unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure(); - - // Finds the kernel and get the group simd size from the kernel - if (m_FGA) - { - llvm::Function* Kernel = &F; - auto FG = m_FGA->getGroup(&F); - Kernel = FG->getHead(); - funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel); - simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize(); - maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure(); - } + uint32_t simd_size = getReqdSubGroupSize(F, pMdUtils); + uint32_t maxPressure = getMaxPressure(F, pMdUtils); // For simd variant functions, detect which SIMD sizes are needed if (compileFunctionVariants && F.hasFnAttribute("variant-function-def")) @@ -3896,14 +3924,6 @@ namespace IGC bool hasIndirectCall = FG && FG->hasIndirectCall(); if (hasNestedCall || hasIndirectCall || isIndirectGroup) { - // If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed - if (simd_size == 32) - { - pCtx->EmitWarning("Detected 'reqd_sub_group_size=32', but compiling to SIMD16 due to enabling CallWA, which does not support SIMD32 when nested/indirect calls are present."); - llvm::Function* Kernel = FG->getHead(); - funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel); - funcInfoMD->getSubGroupSize()->setSIMDSize(16); - } pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE); return SIMDStatus::SIMD_FUNC_FAIL; } diff --git a/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp b/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp index 277d2c0eeb89..c763a736b080 100644 --- a/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp +++ b/IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp @@ -263,6 +263,9 @@ namespace IGC std::string getKernelArgTypeQualifier(const FunctionMetaData& funcMD, uint argIndex) const; std::string getKernelArgAddressQualifier(const FunctionMetaData& funcMD, uint argIndex) const; std::string getKernelArgAccessQualifier(const FunctionMetaData& funcMD, uint argIndex) const; + // Helper function to get SIMD size specified in intel_reqd_sub_group_size attribute + uint32_t getReqdSubGroupSize(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const; + uint32_t getMaxPressure(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const; }; void CodeGen(OpenCLProgramContext* ctx); diff --git a/IGC/ocloc_tests/features/reqd_sub_group_size/report_error.cl b/IGC/ocloc_tests/features/reqd_sub_group_size/report_error.cl new file mode 100644 index 000000000000..c10ae98d7217 --- /dev/null +++ b/IGC/ocloc_tests/features/reqd_sub_group_size/report_error.cl @@ -0,0 +1,27 @@ +/*========================== begin_copyright_notice ============================ + +Copyright (C) 2024 Intel Corporation + +SPDX-License-Identifier: MIT + +============================= end_copyright_notice ===========================*/ + +// REQUIRES: dg2-supported +// RUN: not ocloc compile -file %s -device dg2 | FileCheck %s + +// IGC enables EUFusion CallWA for DG2 when nested stackcalls or indirect calls +// are present in a module. The workaround is not supported in SIMD32. This test +// verifies if a proper error message is printed if CallWA is required and +// intel_reqd_sub_group_size kernel attribute is set to 32. + +// CHECK: error: Cannot compile a kernel in the SIMD mode specified by intel_reqd_sub_group_size(32) +// CHECK-NEXT: in kernel: 'test_simple' + +int fact(int n) { + return n < 2 ? 1 : n * fact(n - 1); +} + +__attribute__((intel_reqd_sub_group_size(32))) +kernel void test_simple(global int* out, int n) { + out[0] = fact(n); +}