From ccd9d169b0189d0503ec885bf86e9f026cb621a8 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Wed, 19 Oct 2022 18:16:10 -0700 Subject: [PATCH] [JIT] X64 - Three instruction replacement sequence for multiply in certain cases (#76981) * Using 3 instruction sequence for x64 multiply * Do not do this in morph. Do it in codegen now. * Fixing codegen * Only allow values under 127 and do not skip mov - correctness testing * Try to fix tests * cleanup * Moving to Lowering * Quick fix * Fully works in lowering now * Account for all ints * Take into account codegen opts * Minor cleanup * Minor cleanup * Fixed test * Added int multiply disasm checks. Fixed SuperFileCheck namespace bug. Made SuperFileCheck anchors more likely to match. * Update comments * Update comments * Update comments * Update comments * Formatting * Fixing build * Fixing build again * minor rename * Feedback. Removed use of FULL-LINE as it is more readable not strictly necessary. Forgot to add an additional instruction to a disasm test. * Formatting --- src/coreclr/jit/lower.h | 1 + src/coreclr/jit/lowerxarch.cpp | 100 ++++++++++ src/coreclr/tools/SuperFileCheck/Program.cs | 6 +- src/tests/JIT/opt/Multiply/IntMultiply.cs | 182 ++++++++++++++++++ src/tests/JIT/opt/Multiply/IntMultiply.csproj | 17 ++ 5 files changed, 303 insertions(+), 3 deletions(-) create mode 100644 src/tests/JIT/opt/Multiply/IntMultiply.cs create mode 100644 src/tests/JIT/opt/Multiply/IntMultiply.csproj diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 84ba314adc514..30b12a38729e6 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -319,6 +319,7 @@ class Lowering final : public Phase void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode); #ifdef TARGET_XARCH void LowerPutArgStk(GenTreePutArgStk* putArgStk); + GenTree* TryLowerMulToLshSubOrLshAdd(GenTreeOp* node); #endif // TARGET_XARCH bool TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* parent); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 048faf2e1843a..72d9720c5ec3d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -105,6 +105,97 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node) ContainCheckStoreIndir(node); } +//---------------------------------------------------------------------------------------------- +// Lowering::TryLowerMulToLshSubOrLshAdd: +// Lowers a tree MUL(X, CNS) to SUB(LSH(X, CNS_SHIFT), X) +// or +// Lowers a tree MUL(X, CNS) to ADD(LSH(X, CNS_SHIFT), X) +// +// Arguments: +// node - GT_MUL node of integral type +// +// Return Value: +// Returns the replacement node if one is created else nullptr indicating no replacement +// +// Notes: +// Performs containment checks on the replacement node if one is created +GenTree* Lowering::TryLowerMulToLshSubOrLshAdd(GenTreeOp* node) +{ + assert(node->OperIs(GT_MUL)); + +// We do not do this optimization in X86 as it is not recommended. +#if TARGET_X86 + return nullptr; +#endif // TARGET_X86 + if (!varTypeIsIntegral(node)) + return nullptr; + + if (node->gtOverflow()) + return nullptr; + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + if (op1->isContained() || op2->isContained()) + return nullptr; + + if (!op1->OperIs(GT_LCL_VAR)) + return nullptr; + + if (!op2->IsCnsIntOrI()) + return nullptr; + + GenTreeIntConCommon* cns = op2->AsIntConCommon(); + ssize_t cnsVal = cns->IconValue(); + + // Use GT_LSH if cnsVal is a power of two. + // This is handled in codegen. + if (isPow2(cnsVal)) + return nullptr; + + // Use GT_LEA if cnsVal is 3, 5, or 9. + // This is handled in codegen. + if (cnsVal == 3 || cnsVal == 5 || cnsVal == 9) + return nullptr; + + ssize_t cnsValPlusOne = cnsVal + 1; + ssize_t cnsValMinusOne = cnsVal - 1; + + bool useSub = isPow2(cnsValPlusOne); + + if (!useSub && !isPow2(cnsValMinusOne)) + return nullptr; + + if (useSub) + { + cnsVal = cnsValPlusOne; + node->ChangeOper(GT_SUB); + } + else + { + cnsVal = cnsValMinusOne; + node->ChangeOper(GT_ADD); + } + + unsigned int shiftAmount = genLog2(static_cast(static_cast(cnsVal))); + cns->SetIconValue(shiftAmount); + + node->gtOp1 = comp->gtNewOperNode(GT_LSH, node->gtType, op1, cns); + node->gtOp2 = comp->gtClone(op1); + + BlockRange().Remove(op1); + BlockRange().Remove(cns); + BlockRange().InsertBefore(node, node->gtGetOp2()); + BlockRange().InsertBefore(node, cns); + BlockRange().InsertBefore(node, op1); + BlockRange().InsertBefore(node, node->gtGetOp1()); + + ContainCheckBinary(node); + ContainCheckShiftRotate(node->gtGetOp1()->AsOp()); + + return node; +} + //------------------------------------------------------------------------ // LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node. // @@ -120,6 +211,15 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) { assert(mul->OperIsMul()); + if (mul->OperIs(GT_MUL)) + { + GenTree* replacementNode = TryLowerMulToLshSubOrLshAdd(mul); + if (replacementNode != nullptr) + { + return replacementNode->gtNext; + } + } + ContainCheckMul(mul); return mul->gtNext; diff --git a/src/coreclr/tools/SuperFileCheck/Program.cs b/src/coreclr/tools/SuperFileCheck/Program.cs index 79b5d1548b101..6d9e8d4f5aab9 100644 --- a/src/coreclr/tools/SuperFileCheck/Program.cs +++ b/src/coreclr/tools/SuperFileCheck/Program.cs @@ -224,7 +224,7 @@ static string GetFullyQualifiedEnclosingTypeName(MethodDeclarationSyntax methodD if (namespaceDecl != null) { var identifiers = - namespaceDecl.DescendantTokens().Where(x => x.IsKind(SyntaxKind.IdentifierToken)).Select(x => x.ValueText); + namespaceDecl.Name.DescendantTokens().Where(x => x.IsKind(SyntaxKind.IdentifierToken)).Select(x => x.ValueText); return $"{String.Join(".", identifiers)}.{qualifiedTypeName}"; } @@ -373,8 +373,8 @@ static string PreProcessMethod(MethodDeclarationInfo methodDeclInfo, string[] ch var methodName = methodDeclInfo.FullyQualifiedName.Replace("*", "{{.*}}"); // Change wild-card to FileCheck wild-card syntax. // Create anchors from the first prefix. - var startAnchorText = $"// {checkPrefixes[0]}-LABEL: {methodName}"; - var endAnchorText = $"// {checkPrefixes[0]}: {methodName}"; + var startAnchorText = $"// {checkPrefixes[0]}-LABEL: for method {methodName}"; + var endAnchorText = $"// {checkPrefixes[0]}: for method {methodName}"; // Create temp source file based on the source text of the method. // Newlines are added to pad the text so FileCheck's error messages will correspond diff --git a/src/tests/JIT/opt/Multiply/IntMultiply.cs b/src/tests/JIT/opt/Multiply/IntMultiply.cs new file mode 100644 index 0000000000000..1bb274f0df976 --- /dev/null +++ b/src/tests/JIT/opt/Multiply/IntMultiply.cs @@ -0,0 +1,182 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; + +namespace CodeGenTests +{ + static class IntMultiply + { + [MethodImpl(MethodImplOptions.NoInlining)] + static uint UInt32_MultiplyWithUInt32MaxValue(uint value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: neg [[REG0]] + return value * UInt32.MaxValue; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWithUInt32MaxValue(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 32 + // X64-NEXT: sub [[REG0]], [[REG1]] + return value * (ulong)UInt32.MaxValue; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWithUInt32MaxValuePlusOne(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 32 + return value * ((ulong)UInt32.MaxValue + 1); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWithUInt32MaxValuePlusTwo(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 32 + // X64-NEXT: add [[REG0]], [[REG1]] + return value * ((ulong)UInt32.MaxValue + 2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith2(ulong value) + { + // X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+[[REG1]]{{\]}} + return value * 2; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith3(ulong value) + { + // X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+2*[[REG1]]{{\]}} + return value * 3; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith4(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 2 + return value * 4; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith5(ulong value) + { + // X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+4*[[REG1]]{{\]}} + return value * 5; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith6(ulong value) + { + // X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+2*[[REG1]]{{\]}} + // X64-NEXT: add [[REG0]], [[REG0]] + return value * 6; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith7(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 3 + // X64-NEXT: sub [[REG0]], [[REG1]] + return value * 7; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith8(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 3 + return value * 8; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith9(ulong value) + { + // X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+8*[[REG1]]{{\]}} + return value * 9; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith15(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 4 + // X64-NEXT: sub [[REG0]], [[REG1]] + return value * 15; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith16(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 4 + return value * 16; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong UInt64_MultiplyWith17(ulong value) + { + // X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]] + // X64-NEXT: shl [[REG0]], 4 + // X64-NEXT: add [[REG0]], [[REG1]] + return value * 17; + } + + static int Main() + { + if (UInt32_MultiplyWithUInt32MaxValue(1) != UInt32.MaxValue) + return 0; + + if (UInt64_MultiplyWithUInt32MaxValue(1) != (ulong)UInt32.MaxValue) + return 0; + + if (UInt64_MultiplyWithUInt32MaxValuePlusOne(1) != ((ulong)UInt32.MaxValue + 1)) + return 0; + + if (UInt64_MultiplyWithUInt32MaxValuePlusTwo(1) != ((ulong)UInt32.MaxValue + 2)) + return 0; + + if (UInt64_MultiplyWith2(1) != 2) + return 0; + + if (UInt64_MultiplyWith3(1) != 3) + return 0; + + if (UInt64_MultiplyWith4(1) != 4) + return 0; + + if (UInt64_MultiplyWith5(1) != 5) + return 0; + + if (UInt64_MultiplyWith6(1) != 6) + return 0; + + if (UInt64_MultiplyWith7(1) != 7) + return 0; + + if (UInt64_MultiplyWith8(1) != 8) + return 0; + + if (UInt64_MultiplyWith9(1) != 9) + return 0; + + if (UInt64_MultiplyWith15(1) != 15) + return 0; + + if (UInt64_MultiplyWith16(1) != 16) + return 0; + + if (UInt64_MultiplyWith17(1) != 17) + return 0; + + return 100; + } + } +} diff --git a/src/tests/JIT/opt/Multiply/IntMultiply.csproj b/src/tests/JIT/opt/Multiply/IntMultiply.csproj new file mode 100644 index 0000000000000..43448e90aa468 --- /dev/null +++ b/src/tests/JIT/opt/Multiply/IntMultiply.csproj @@ -0,0 +1,17 @@ + + + Exe + + + None + True + + + + true + + + + + +