Skip to content

Commit

Permalink
Ensure the scalar Reciprocal*Estimate APIs use AVX512 where possible (d…
Browse files Browse the repository at this point in the history
…otnet#101800)

* Ensure the scalar Reciprocal*Estimate APIs use AVX512 where possible

* Ensure that TensorPrimitives.Reciprocal APIs consistently use AVX512

* Move the implementation of ReciprocalEstimate and ReciprocalSqrtEstimate to the JIT so R2R works

* Ensure the relevant math intrinsics are marked betterToExpand

* Apply formatting patch

* Fixing the method header for impEstimateIntrinsic

* Don't use the AVX512 paths for TensorPrimitives.Reciprocal*Estimate APIs on .NET 8

* Workaround an issue where the TensorPrimitives net8 tests are not running on net8
  • Loading branch information
tannergooding authored and Ruihan-Yin committed May 30, 2024
1 parent 4da5551 commit 7108d59
Show file tree
Hide file tree
Showing 10 changed files with 217 additions and 53 deletions.
7 changes: 6 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1594,7 +1594,7 @@ enum class ProfileChecks : unsigned int
{
CHECK_NONE = 0,
CHECK_HASLIKELIHOOD = 1 << 0, // check all FlowEdges for hasLikelihood
CHECK_LIKELIHOODSUM = 1 << 1, // check block succesor likelihoods sum to 1
CHECK_LIKELIHOODSUM = 1 << 1, // check block succesor likelihoods sum to 1
CHECK_LIKELY = 1 << 2, // fully check likelihood based weights
RAISE_ASSERT = 1 << 3, // assert on check failure
CHECK_ALL_BLOCKS = 1 << 4, // check blocks even if bbHasProfileWeight is false
Expand Down Expand Up @@ -4528,6 +4528,11 @@ class Compiler
CORINFO_THIS_TRANSFORM constraintCallThisTransform,
NamedIntrinsic* pIntrinsicName,
bool* isSpecialIntrinsic = nullptr);
GenTree* impEstimateIntrinsic(CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
CorInfoType callJitType,
NamedIntrinsic intrinsicName,
bool tailCall);
GenTree* impMathIntrinsic(CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
var_types callType,
Expand Down
163 changes: 155 additions & 8 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3125,7 +3125,15 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
// To be fixed in https://github.com/dotnet/runtime/pull/77465
const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT);

if (!mustExpand && tier0opts)
if (tier0opts)
{
// The *Estimate APIs are allowed to differ in behavior across hardware
// so ensure we treat them as "betterToExpand" to get deterministic behavior

betterToExpand |= (ni == NI_System_Math_ReciprocalEstimate);
betterToExpand |= (ni == NI_System_Math_ReciprocalSqrtEstimate);
}
else if (!mustExpand)
{
switch (ni)
{
Expand Down Expand Up @@ -3189,9 +3197,9 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
break;

default:
// Unsafe.* are all small enough to prefer expansions.
// Various intrinsics are all small enough to prefer expansions.
betterToExpand |= ni >= NI_SYSTEM_MATH_START && ni <= NI_SYSTEM_MATH_END;
betterToExpand |= ni >= NI_SRCS_UNSAFE_START && ni <= NI_SRCS_UNSAFE_END;
// Same for these
betterToExpand |= ni >= NI_PRIMITIVE_START && ni <= NI_PRIMITIVE_END;
break;
}
Expand Down Expand Up @@ -4146,6 +4154,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
break;
}

case NI_System_Math_ReciprocalEstimate:
case NI_System_Math_ReciprocalSqrtEstimate:
{
retNode = impEstimateIntrinsic(method, sig, callJitType, ni, tailCall);
break;
}

case NI_System_Array_Clone:
case NI_System_Collections_Generic_Comparer_get_Default:
case NI_System_Collections_Generic_EqualityComparer_get_Default:
Expand Down Expand Up @@ -7413,13 +7428,15 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
// instructions to directly compute round/ceiling/floor/truncate.

case NI_System_Math_Abs:
case NI_System_Math_ReciprocalEstimate:
case NI_System_Math_ReciprocalSqrtEstimate:
case NI_System_Math_Sqrt:
return true;

case NI_System_Math_Ceiling:
case NI_System_Math_Floor:
case NI_System_Math_Truncate:
case NI_System_Math_Round:
case NI_System_Math_Truncate:
return compOpportunisticallyDependsOn(InstructionSet_SSE41);

case NI_System_Math_FusedMultiplyAdd:
Expand All @@ -7434,11 +7451,13 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
case NI_System_Math_Abs:
case NI_System_Math_Ceiling:
case NI_System_Math_Floor:
case NI_System_Math_Truncate:
case NI_System_Math_Round:
case NI_System_Math_Sqrt:
case NI_System_Math_Max:
case NI_System_Math_Min:
case NI_System_Math_ReciprocalEstimate:
case NI_System_Math_ReciprocalSqrtEstimate:
case NI_System_Math_Round:
case NI_System_Math_Sqrt:
case NI_System_Math_Truncate:
return true;

case NI_System_Math_FusedMultiplyAdd:
Expand Down Expand Up @@ -7513,6 +7532,8 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName)
case NI_System_Math_MinMagnitudeNumber:
case NI_System_Math_MinNumber:
case NI_System_Math_Pow:
case NI_System_Math_ReciprocalEstimate:
case NI_System_Math_ReciprocalSqrtEstimate:
case NI_System_Math_Round:
case NI_System_Math_Sin:
case NI_System_Math_Sinh:
Expand Down Expand Up @@ -8728,6 +8749,119 @@ void Compiler::impCheckCanInline(GenTreeCall* call,
}
}

//------------------------------------------------------------------------
// impEstimateIntrinsic: Imports one of the *Estimate intrinsics which are
// explicitly allowed to differ in result based on the hardware they're running
// against
//
// Arguments:
// method - The handle of the method being imported
// callType - The underlying type for the call
// intrinsicName - The intrinsic being imported
// tailCall - true if the method is a tail call; otherwise false
//
GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
CorInfoType callJitType,
NamedIntrinsic intrinsicName,
bool tailCall)
{
var_types callType = JITtype2varType(callJitType);

assert(varTypeIsFloating(callType));
assert(sig->numArgs == 1);

#if defined(FEATURE_HW_INTRINSICS)
// We use compExactlyDependsOn since these are estimate APIs where
// the behavior is explicitly allowed to differ across machines and
// we want to ensure that it gets marked as such in R2R.

var_types simdType = TYP_UNKNOWN;
NamedIntrinsic intrinsicId = NI_Illegal;

switch (intrinsicName)
{
case NI_System_Math_ReciprocalEstimate:
{
#if defined(TARGET_XARCH)
if (compExactlyDependsOn(InstructionSet_AVX512F))
{
simdType = TYP_SIMD16;
intrinsicId = NI_AVX512F_Reciprocal14Scalar;
}
else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE))
{
simdType = TYP_SIMD16;
intrinsicId = NI_SSE_ReciprocalScalar;
}
#elif defined(TARGET_ARM64)
if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64))
{
simdType = TYP_SIMD8;
intrinsicId = NI_AdvSimd_Arm64_ReciprocalEstimateScalar;
}
#endif // TARGET_ARM64
break;
}

case NI_System_Math_ReciprocalSqrtEstimate:
{
#if defined(TARGET_XARCH)
if (compExactlyDependsOn(InstructionSet_AVX512F))
{
simdType = TYP_SIMD16;
intrinsicId = NI_AVX512F_ReciprocalSqrt14Scalar;
}
else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE))
{
simdType = TYP_SIMD16;
intrinsicId = NI_SSE_ReciprocalSqrtScalar;
}
#elif defined(TARGET_ARM64)
if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64))
{
simdType = TYP_SIMD8;
intrinsicId = NI_AdvSimd_Arm64_ReciprocalSquareRootEstimateScalar;
}
#endif // TARGET_ARM64
break;
}

default:
{
unreached();
}
}

if (intrinsicId != NI_Illegal)
{
unsigned simdSize = 0;

if (simdType == TYP_SIMD8)
{
simdSize = 8;
}
else
{
assert(simdType == TYP_SIMD16);
simdSize = 16;
}

GenTree* op1 = impPopStack().val;

op1 = gtNewSimdCreateScalarUnsafeNode(simdType, op1, callJitType, simdSize);
op1 = gtNewSimdHWIntrinsicNode(simdType, op1, intrinsicId, callJitType, simdSize);

return gtNewSimdToScalarNode(callType, op1, callJitType, simdSize);
}
#endif // FEATURE_HW_INTRINSICS

// TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable
// return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall);

return nullptr;
}

GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
var_types callType,
Expand Down Expand Up @@ -10337,7 +10471,20 @@ NamedIntrinsic Compiler::lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDL

case 'R':
{
if (strcmp(methodName, "Round") == 0)
if (strncmp(methodName, "Reciprocal", 10) == 0)
{
methodName += 10;

if (strcmp(methodName, "Estimate") == 0)
{
result = NI_System_Math_ReciprocalEstimate;
}
else if (strcmp(methodName, "SqrtEstimate") == 0)
{
result = NI_System_Math_ReciprocalSqrtEstimate;
}
}
else if (strcmp(methodName, "Round") == 0)
{
result = NI_System_Math_Round;
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/namedintrinsiclist.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ enum NamedIntrinsic : unsigned short
NI_System_Math_MinMagnitudeNumber,
NI_System_Math_MinNumber,
NI_System_Math_Pow,
NI_System_Math_ReciprocalEstimate,
NI_System_Math_ReciprocalSqrtEstimate,
NI_System_Math_Round,
NI_System_Math_Sin,
NI_System_Math_Sinh,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ public static void ReciprocalSqrtEstimate<T>(ReadOnlySpan<T> x, Span<T> destinat

public static Vector128<T> Invoke(Vector128<T> x)
{
#if NET9_0_OR_GREATER
if (Avx512F.VL.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx512F.VL.Reciprocal14(x.AsSingle()).As<float, T>();
if (typeof(T) == typeof(double)) return Avx512F.VL.Reciprocal14(x.AsDouble()).As<double, T>();
}
#endif

if (Sse.IsSupported)
{
if (typeof(T) == typeof(float)) return Sse.Reciprocal(x.AsSingle()).As<float, T>();
Expand All @@ -115,6 +123,14 @@ public static Vector128<T> Invoke(Vector128<T> x)

public static Vector256<T> Invoke(Vector256<T> x)
{
#if NET9_0_OR_GREATER
if (Avx512F.VL.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx512F.VL.Reciprocal14(x.AsSingle()).As<float, T>();
if (typeof(T) == typeof(double)) return Avx512F.VL.Reciprocal14(x.AsDouble()).As<double, T>();
}
#endif

if (Avx.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx.Reciprocal(x.AsSingle()).As<float, T>();
Expand All @@ -125,11 +141,13 @@ public static Vector256<T> Invoke(Vector256<T> x)

public static Vector512<T> Invoke(Vector512<T> x)
{
#if NET9_0_OR_GREATER
if (Avx512F.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx512F.Reciprocal14(x.AsSingle()).As<float, T>();
if (typeof(T) == typeof(double)) return Avx512F.Reciprocal14(x.AsDouble()).As<double, T>();
}
#endif

return Vector512<T>.One / x;
}
Expand All @@ -143,6 +161,14 @@ public static Vector512<T> Invoke(Vector512<T> x)

public static Vector128<T> Invoke(Vector128<T> x)
{
#if NET9_0_OR_GREATER
if (Avx512F.VL.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx512F.VL.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
if (typeof(T) == typeof(double)) return Avx512F.VL.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
}
#endif

if (Sse.IsSupported)
{
if (typeof(T) == typeof(float)) return Sse.ReciprocalSqrt(x.AsSingle()).As<float, T>();
Expand All @@ -163,6 +189,14 @@ public static Vector128<T> Invoke(Vector128<T> x)

public static Vector256<T> Invoke(Vector256<T> x)
{
#if NET9_0_OR_GREATER
if (Avx512F.VL.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx512F.VL.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
if (typeof(T) == typeof(double)) return Avx512F.VL.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
}
#endif

if (Avx.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx.ReciprocalSqrt(x.AsSingle()).As<float, T>();
Expand All @@ -173,11 +207,13 @@ public static Vector256<T> Invoke(Vector256<T> x)

public static Vector512<T> Invoke(Vector512<T> x)
{
#if NET9_0_OR_GREATER
if (Avx512F.IsSupported)
{
if (typeof(T) == typeof(float)) return Avx512F.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
if (typeof(T) == typeof(double)) return Avx512F.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
}
#endif

return Vector512<T>.One / Vector512.Sqrt(x);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<PropertyGroup>
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<DefineConstants>$(DefineConstants);SNT_NET8_TESTS</DefineConstants>
</PropertyGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,12 @@ public static IEnumerable<object[]> SpanDestinationFunctionsToTest()
yield return Create(TensorPrimitives.Reciprocal, f => T.One / f);
yield return Create(TensorPrimitives.ReciprocalEstimate, T.ReciprocalEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates));
yield return Create(TensorPrimitives.ReciprocalSqrt, f => T.One / T.Sqrt(f));

#if !SNT_NET8_TESTS
// Avoid running with the net8 tests due to: https://github.com/dotnet/runtime/issues/101846
yield return Create(TensorPrimitives.ReciprocalSqrtEstimate, T.ReciprocalSqrtEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates));
#endif

yield return Create(TensorPrimitives.Round, T.Round);
yield return Create(TensorPrimitives.Sin, T.Sin, trigTolerance);
yield return Create(TensorPrimitives.Sinh, T.Sinh, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14));
Expand Down
2 changes: 2 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/Double.cs
Original file line number Diff line number Diff line change
Expand Up @@ -865,9 +865,11 @@ bool IFloatingPoint<double>.TryWriteSignificandLittleEndian(Span<byte> destinati
public static double Lerp(double value1, double value2, double amount) => (value1 * (1.0 - amount)) + (value2 * amount);

/// <inheritdoc cref="IFloatingPointIeee754{TSelf}.ReciprocalEstimate(TSelf)" />
[Intrinsic]
public static double ReciprocalEstimate(double x) => Math.ReciprocalEstimate(x);

/// <inheritdoc cref="IFloatingPointIeee754{TSelf}.ReciprocalSqrtEstimate(TSelf)" />
[Intrinsic]
public static double ReciprocalSqrtEstimate(double x) => Math.ReciprocalSqrtEstimate(x);

/// <inheritdoc cref="IFloatingPointIeee754{TSelf}.ScaleB(TSelf, int)" />
Expand Down
Loading

0 comments on commit 7108d59

Please sign in to comment.