diff --git a/IGC/common/StateSaveAreaHeader.h b/IGC/common/StateSaveAreaHeader.h index 1fa4ba534965..87f683a673e8 100644 --- a/IGC/common/StateSaveAreaHeader.h +++ b/IGC/common/StateSaveAreaHeader.h @@ -143,4 +143,11 @@ struct StateSaveAreaHeaderV3 struct intelgt_state_save_area_V3 regHeader; }; +// versionHeader.version.major = 4 +struct StateSaveAreaHeaderV4 +{ + struct StateSaveArea versionHeader; + uint64_t total_wmtp_data_size; +}; + } // namespace SIP diff --git a/IGC/common/SystemThread.cpp b/IGC/common/SystemThread.cpp index b01e67ce69f0..0e294f6dafe7 100644 --- a/IGC/common/SystemThread.cpp +++ b/IGC/common/SystemThread.cpp @@ -247,6 +247,14 @@ struct StateSaveAreaHeader Xe2SIPCSRDebugBindlessDebugHeader = } }; + +// wmtp LNL SIP +struct StateSaveAreaHeaderV4 Xe2SIP_WMTP_CSRDebugBindlessDebugHeader = +{ + {"tssarea", 0, {4, 0, 0}, sizeof(StateSaveAreaHeaderV4) / 8, {0, 0, 0}}, // versionHeader + 0 // total_wmtp_data_size +}; + // Debug surface area for all XeHPC+ architectures struct XeHPCDebugSurfaceLayout { @@ -868,7 +876,7 @@ bool CSystemThread::CreateSystemThreadKernel( if( pKernelProgram ) { - pKernelProgram->Create( platform, mode, bindlessMode, SLM_ANY); + pKernelProgram->Create( platform, mode, bindlessMode); pSystemThreadKernelOutput->m_KernelProgramSize = pKernelProgram->GetProgramSize(); pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize = pKernelProgram->GetStateSaveHeaderSize(); @@ -958,108 +966,6 @@ void CSystemThread::DeleteSystemThreadKernel( pSystemThreadKernelOutput = nullptr; } -bool -CSystemThread::CreateSystemThreadKernel_v2(const PLATFORM &platform, const IGC::SCompilerHwCaps &Caps, - const GT_SYSTEM_INFO &sysInfo, const SYSTEM_THREAD_MODE mode, - USC::SSystemThreadKernelOutput *&pSystemThreadKernelOutput, - SLM_SIZE_SUPPORTED slmSize) -{ - if (!SIPSuppoertedOnPlatformFamily(platform.eRenderCoreFamily)) - { - return false; - } - - bool success = true; - - // Check if the System Thread mode in the correct range. - // SYSTEM_THREAD_MODE_CSR_64B is not yet supported. - if (!(mode & SYSTEM_THREAD_MODE_CSR)) - { - success = false; - } - - // Create System Thread kernel program. - if (success) - { - CGenSystemInstructionKernelProgram *pKernelProgram = new CGenSystemInstructionKernelProgram(mode); - success = pKernelProgram ? true : false; - // Allocate memory for SSystemThreadKernelOutput. - if (success) - { - pSystemThreadKernelOutput = new SSystemThreadKernelOutput; - - success = (pSystemThreadKernelOutput != nullptr); - - if (success) - { - memset(pSystemThreadKernelOutput, 0, sizeof(SSystemThreadKernelOutput)); - } - } - - const unsigned int DQWORD_SIZE = 2 * sizeof(unsigned long long); - - if (pKernelProgram) - { - pKernelProgram->Create(platform, mode, true, slmSize); - - pSystemThreadKernelOutput->m_KernelProgramSize = pKernelProgram->GetProgramSize(); - pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize = pKernelProgram->GetStateSaveHeaderSize(); - - if (mode & SYSTEM_THREAD_MODE_CSR) - pSystemThreadKernelOutput->m_SystemThreadScratchSpace = - Caps.KernelHwCaps.CsrSizeInMb * sizeof(MEGABYTE); - - if (mode & (SYSTEM_THREAD_MODE_DEBUG | SYSTEM_THREAD_MODE_DEBUG_LOCAL)) - pSystemThreadKernelOutput->m_SystemThreadResourceSize = - Caps.KernelHwCaps.CsrSizeInMb * 2 * sizeof(MEGABYTE); - - pSystemThreadKernelOutput->m_pKernelProgram = - IGC::aligned_malloc(pSystemThreadKernelOutput->m_KernelProgramSize, DQWORD_SIZE); - - if (pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize) - pSystemThreadKernelOutput->m_pStateSaveAreaHeader = - IGC::aligned_malloc(pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize, DQWORD_SIZE); - - success = (pSystemThreadKernelOutput->m_pKernelProgram != nullptr); - if (pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize) - success &= (pSystemThreadKernelOutput->m_pStateSaveAreaHeader != nullptr); - } - - if (success) - { - const void *pStartAddress = pKernelProgram->GetLinearAddress(); - const void *pStateSaveAddress = pKernelProgram->GetStateSaveHeaderAddress(); - - if (!pStartAddress || (pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize && !pStateSaveAddress)) - { - success = false; - } - - if (success) - { - memcpy_s(pSystemThreadKernelOutput->m_pKernelProgram, pSystemThreadKernelOutput->m_KernelProgramSize, - pStartAddress, pSystemThreadKernelOutput->m_KernelProgramSize); - - if (pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize) - memcpy_s(pSystemThreadKernelOutput->m_pStateSaveAreaHeader, - pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize, pStateSaveAddress, - pSystemThreadKernelOutput->m_StateSaveAreaHeaderSize); - } - } - else - { - success = false; - } - - if (pKernelProgram) - { - pKernelProgram->Delete(pKernelProgram); - delete pKernelProgram; - } - } - return success; -} - //populate the SIPKernelInfo map with starting address and size of every SIP kernels void populateSIPKernelInfo(const IGC::CPlatform &platform, std::map< unsigned char, std::tuple > &SIPKernelInfo) @@ -1197,12 +1103,12 @@ void populateSIPKernelInfo(const IGC::CPlatform &platform, // LNL 1x4 128 SIPKernelInfo[XE2_CSR_DEBUG_BINDLESS_LNL_1x4_128] = std::make_tuple( (void *)&XE2_LNL_1x4_128, (int)sizeof(XE2_LNL_1x4_128), - (void *)&XE2_LNL_1x4_128, XE2_CSR_DEBUG_BINDLESS_LNL_1x4_128_WMTP_DATA_SIZE); + (void *)&Xe2SIP_WMTP_CSRDebugBindlessDebugHeader, (int)sizeof(Xe2SIP_WMTP_CSRDebugBindlessDebugHeader)); // LNL 1x4 160 SIPKernelInfo[XE2_CSR_DEBUG_BINDLESS_LNL_1x4_160] = std::make_tuple( (void *)&XE2_LNL_1x4_160, (int)sizeof(XE2_LNL_1x4_160), - (void *)&XE2_LNL_1x4_160, XE2_CSR_DEBUG_BINDLESS_LNL_1x4_160_WMTP_DATA_SIZE); + (void *)&Xe2SIP_WMTP_CSRDebugBindlessDebugHeader, (int)sizeof(Xe2SIP_WMTP_CSRDebugBindlessDebugHeader)); @@ -1210,12 +1116,12 @@ void populateSIPKernelInfo(const IGC::CPlatform &platform, SIPKernelInfo[XE2_CSR_DEBUG_BINDLESS_LNL_2x4_128] = std::make_tuple((void *)&XE2_LNL_2x4_128, (int)sizeof(XE2_LNL_2x4_128), - (void *)&XE2_LNL_2x4_128, XE2_CSR_DEBUG_BINDLESS_LNL_2x4_128_WMTP_DATA_SIZE); + (void *)&Xe2SIP_WMTP_CSRDebugBindlessDebugHeader, (int)sizeof(Xe2SIP_WMTP_CSRDebugBindlessDebugHeader)); // LNL 2x4 160 SIPKernelInfo[XE2_CSR_DEBUG_BINDLESS_LNL_2x4_160] = std::make_tuple( (void *)&XE2_LNL_2x4_160, (int)sizeof(XE2_LNL_2x4_160), - (void *)&XE2_LNL_2x4_160, XE2_CSR_DEBUG_BINDLESS_LNL_2x4_160_WMTP_DATA_SIZE); + (void *)&Xe2SIP_WMTP_CSRDebugBindlessDebugHeader, (int)sizeof(Xe2SIP_WMTP_CSRDebugBindlessDebugHeader)); } @@ -1224,10 +1130,9 @@ void populateSIPKernelInfo(const IGC::CPlatform &platform, CGenSystemInstructionKernelProgram* CGenSystemInstructionKernelProgram::Create( const IGC::CPlatform &platform, const SYSTEM_THREAD_MODE mode, - const bool bindlessMode, - SLM_SIZE_SUPPORTED slmSize) + const bool bindlessMode) { - unsigned char SIPIndex = 0; + unsigned char SIPIndex = GEN_SIP_MAX_INDEX; std::map< unsigned char, std::tuple > SIPKernelInfo; populateSIPKernelInfo(platform, SIPKernelInfo); @@ -1357,6 +1262,8 @@ CGenSystemInstructionKernelProgram* CGenSystemInstructionKernelProgram::Create( } else if (mode == SYSTEM_THREAD_MODE_CSR) { + uint32_t slices = sysInfo.MaxSlicesSupported; + uint32_t subslices_per_slice =(sysInfo.MaxSlicesSupported > 0 ? (sysInfo.MaxSubSlicesSupported / sysInfo.MaxSlicesSupported) : sysInfo.MaxSubSlicesSupported); switch (platform.getPlatformInfo().eProductFamily) { case IGFX_TIGERLAKE_LP: @@ -1375,27 +1282,43 @@ CGenSystemInstructionKernelProgram* CGenSystemInstructionKernelProgram::Create( case IGFX_METEORLAKE: case IGFX_ARROWLAKE: case IGFX_LUNARLAKE: - if (sysInfo.SliceCount == 1 && sysInfo.SubSliceCount == 4) + if (slices == 1 && subslices_per_slice == 4) { - if (slmSize == SLM_128) + if (sysInfo.SLMSizeInKb == SLM_128) { SIPIndex = XE2_CSR_DEBUG_BINDLESS_LNL_1x4_128; + Xe2SIP_WMTP_CSRDebugBindlessDebugHeader.total_wmtp_data_size = XE2_CSR_DEBUG_BINDLESS_LNL_1x4_128_WMTP_DATA_SIZE; } - else if (slmSize == SLM_160) + else if (sysInfo.SLMSizeInKb == SLM_160) { SIPIndex = XE2_CSR_DEBUG_BINDLESS_LNL_1x4_160; + Xe2SIP_WMTP_CSRDebugBindlessDebugHeader.total_wmtp_data_size = XE2_CSR_DEBUG_BINDLESS_LNL_1x4_160_WMTP_DATA_SIZE; + } + else + { + IGC_ASSERT(false); } } - else if (sysInfo.SliceCount == 2 && sysInfo.SubSliceCount == 4) + else if (slices == 2 && subslices_per_slice == 4) { - if (slmSize == SLM_128) + if (sysInfo.SLMSizeInKb == SLM_128) { SIPIndex = XE2_CSR_DEBUG_BINDLESS_LNL_2x4_128; + Xe2SIP_WMTP_CSRDebugBindlessDebugHeader.total_wmtp_data_size = XE2_CSR_DEBUG_BINDLESS_LNL_2x4_128_WMTP_DATA_SIZE; } - else if (slmSize == SLM_160) + else if (sysInfo.SLMSizeInKb == SLM_160) { SIPIndex = XE2_CSR_DEBUG_BINDLESS_LNL_2x4_160; + Xe2SIP_WMTP_CSRDebugBindlessDebugHeader.total_wmtp_data_size = XE2_CSR_DEBUG_BINDLESS_LNL_2x4_160_WMTP_DATA_SIZE; } + else + { + IGC_ASSERT(false); + } + } + else + { + IGC_ASSERT(false); } break; diff --git a/IGC/common/SystemThread.h b/IGC/common/SystemThread.h index 325e043cf1ee..48c6f1c427b8 100644 --- a/IGC/common/SystemThread.h +++ b/IGC/common/SystemThread.h @@ -83,8 +83,8 @@ namespace SIP enum SLM_SIZE_SUPPORTED { SLM_ANY, - SLM_128, - SLM_160 + SLM_128 = 128, + SLM_160 = 160 }; class CSystemThread @@ -96,13 +96,6 @@ namespace SIP USC::SSystemThreadKernelOutput* &pSystemThread, bool bindlessmode = false); - static bool CreateSystemThreadKernel_v2(const PLATFORM &platform, - const IGC::SCompilerHwCaps &Caps, - const GT_SYSTEM_INFO &sysInfo, - const USC::SYSTEM_THREAD_MODE mode, - USC::SSystemThreadKernelOutput *&pSystemThread, - SLM_SIZE_SUPPORTED slmSize); - static void DeleteSystemThreadKernel( USC::SSystemThreadKernelOutput* &pSystemThread ); @@ -117,8 +110,7 @@ namespace SIP CGenSystemInstructionKernelProgram* Create( const IGC::CPlatform &platform, const USC::SYSTEM_THREAD_MODE mode, - const bool bindlessMode, - SLM_SIZE_SUPPORTED slmSize); + const bool bindlessMode); void Delete(CGenSystemInstructionKernelProgram* &pKernelProgram );