From 17d88ed72da4b70821159169499a805b87739ee6 Mon Sep 17 00:00:00 2001 From: SwapnilGaikwad Date: Fri, 7 Jun 2024 01:48:35 +0100 Subject: [PATCH] Add option to change SVE vector length for current and children processes (#101295) * Add option to change SVE vector length for current and children processes. * Use maxVectorTBitWidth to get desired SVE length * Use CNTB to determine current vector length * Use ACLE to detect vector length on Linux and hardcode on Windows * Use rdvl instead of cntb to count vector length * Add inline assembly suitable for MSVC * Add MSCV compatible assembly helper instead of inline assembly * Declare GetSveLengthFromOS on Arm64 * Remove definition of GetSveLengthFromOS from .S file * Move non-windows definition of GetSveLengthFromOS to .S file * Apply suggestions from code review * Move declaration of GetSveLengthFromOS to .cpp file * Disable temporarily for Windows on Arm machines * Hardcode GetSveLengthFromOS() to return SVE length as 128 --------- Co-authored-by: Jan Kotas --- src/coreclr/vm/arm64/asmhelpers.S | 7 +++++++ src/coreclr/vm/arm64/asmhelpers.asm | 8 ++++++++ src/coreclr/vm/codeman.cpp | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 0edbb3fdf92fc..55a275bf5cfd7 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -23,6 +23,13 @@ LEAF_ENTRY GetDataCacheZeroIDReg, _TEXT ret lr LEAF_END GetDataCacheZeroIDReg, _TEXT +// uint64_t GetSveLengthFromOS(void); +.arch_extension sve + LEAF_ENTRY GetSveLengthFromOS, _TEXT + rdvl x0, 1 + ret lr + LEAF_END GetSveLengthFromOS, _TEXT + //----------------------------------------------------------------------------- // This routine captures the machine state. It is used by helper method frame //----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index ec28879607187..e27d2e178c3f8 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -78,6 +78,14 @@ ret lr LEAF_END +;; uint64_t GetSveLengthFromOS(void); + LEAF_ENTRY GetSveLengthFromOS + ;; TODO-SVE: Remove the hardcoded value 128 and uncomment once CI machines are updated to use MASM 14.4 or later + ;; rdvl x0, 1 + mov x0, #128 + ret lr + LEAF_END + ;;----------------------------------------------------------------------------- ;; This routine captures the machine state. It is used by helper method frame ;;----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index d115a22850a74..7d296977d26fe 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1243,6 +1243,7 @@ EEJitManager::EEJitManager() #ifdef TARGET_ARM64 extern "C" DWORD64 __stdcall GetDataCacheZeroIDReg(); +extern "C" uint64_t GetSveLengthFromOS(); #endif void EEJitManager::SetCpuInfo() @@ -1257,12 +1258,12 @@ void EEJitManager::SetCpuInfo() int cpuFeatures = minipal_getcpufeatures(); -#if defined(TARGET_X86) || defined(TARGET_AMD64) - CPUCompileFlags.Set(InstructionSet_VectorT128); - // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; +#if defined(TARGET_X86) || defined(TARGET_AMD64) + CPUCompileFlags.Set(InstructionSet_VectorT128); + if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))) { // We allow 256-bit Vector by default @@ -1514,7 +1515,14 @@ void EEJitManager::SetCpuInfo() if (((cpuFeatures & ARM64IntrinsicConstants_Sve) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sve)) { - CPUCompileFlags.Set(InstructionSet_Sve); + uint32_t maxVectorTLength = (maxVectorTBitWidth / 8); + uint64_t sveLengthFromOS = GetSveLengthFromOS(); + + // Do not enable SVE when the user specified vector length is smaller than the one offered by underlying OS. + if ((maxVectorTLength >= sveLengthFromOS) || (maxVectorTBitWidth == 0)) + { + CPUCompileFlags.Set(InstructionSet_Sve); + } } // DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1).