Skip to content

Commit

Permalink
Add option to change SVE vector length for current and children proce…
Browse files Browse the repository at this point in the history
…sses (#101295)

* Add option to change SVE vector length for current and children processes.

* Use maxVectorTBitWidth to get desired SVE length

* Use CNTB to determine current vector length

* Use ACLE to detect vector length on Linux and hardcode on Windows

* Use rdvl instead of cntb to count vector length

* Add inline assembly suitable for MSVC

* Add MSCV compatible assembly helper instead of inline assembly

* Declare GetSveLengthFromOS on Arm64

* Remove definition of GetSveLengthFromOS from .S file

* Move non-windows definition of GetSveLengthFromOS to .S file

* Apply suggestions from code review

* Move declaration of GetSveLengthFromOS to .cpp file

* Disable  temporarily for Windows on Arm machines

* Hardcode GetSveLengthFromOS() to return SVE length as 128

---------

Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
SwapnilGaikwad and jkotas authored Jun 7, 2024
1 parent f6a7ebb commit 17d88ed
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
7 changes: 7 additions & 0 deletions src/coreclr/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ LEAF_ENTRY GetDataCacheZeroIDReg, _TEXT
ret lr
LEAF_END GetDataCacheZeroIDReg, _TEXT

// uint64_t GetSveLengthFromOS(void);
.arch_extension sve
LEAF_ENTRY GetSveLengthFromOS, _TEXT
rdvl x0, 1
ret lr
LEAF_END GetSveLengthFromOS, _TEXT

//-----------------------------------------------------------------------------
// This routine captures the machine state. It is used by helper method frame
//-----------------------------------------------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/vm/arm64/asmhelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@
ret lr
LEAF_END

;; uint64_t GetSveLengthFromOS(void);
LEAF_ENTRY GetSveLengthFromOS
;; TODO-SVE: Remove the hardcoded value 128 and uncomment once CI machines are updated to use MASM 14.4 or later
;; rdvl x0, 1
mov x0, #128
ret lr
LEAF_END

;;-----------------------------------------------------------------------------
;; This routine captures the machine state. It is used by helper method frame
;;-----------------------------------------------------------------------------
Expand Down
16 changes: 12 additions & 4 deletions src/coreclr/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ EEJitManager::EEJitManager()

#ifdef TARGET_ARM64
extern "C" DWORD64 __stdcall GetDataCacheZeroIDReg();
extern "C" uint64_t GetSveLengthFromOS();
#endif

void EEJitManager::SetCpuInfo()
Expand All @@ -1257,12 +1258,12 @@ void EEJitManager::SetCpuInfo()

int cpuFeatures = minipal_getcpufeatures();

#if defined(TARGET_X86) || defined(TARGET_AMD64)
CPUCompileFlags.Set(InstructionSet_VectorT128);

// Get the maximum bitwidth of Vector<T>, rounding down to the nearest multiple of 128-bits
uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128;

#if defined(TARGET_X86) || defined(TARGET_AMD64)
CPUCompileFlags.Set(InstructionSet_VectorT128);

if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)))
{
// We allow 256-bit Vector<T> by default
Expand Down Expand Up @@ -1514,7 +1515,14 @@ void EEJitManager::SetCpuInfo()

if (((cpuFeatures & ARM64IntrinsicConstants_Sve) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sve))
{
CPUCompileFlags.Set(InstructionSet_Sve);
uint32_t maxVectorTLength = (maxVectorTBitWidth / 8);
uint64_t sveLengthFromOS = GetSveLengthFromOS();

// Do not enable SVE when the user specified vector length is smaller than the one offered by underlying OS.
if ((maxVectorTLength >= sveLengthFromOS) || (maxVectorTBitWidth == 0))
{
CPUCompileFlags.Set(InstructionSet_Sve);
}
}

// DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1).
Expand Down

0 comments on commit 17d88ed

Please sign in to comment.