Skip to content

Commit

Permalink
[JIT] X64 - Three instruction replacement sequence for multiply in ce…
Browse files Browse the repository at this point in the history
…rtain cases (#76981)

* Using 3 instruction sequence for x64 multiply

* Do not do this in morph. Do it in codegen now.

* Fixing codegen

* Only allow values under 127 and do not skip mov - correctness testing

* Try to fix tests

* cleanup

* Moving to Lowering

* Quick fix

* Fully works in lowering now

* Account for all ints

* Take into account codegen opts

* Minor cleanup

* Minor cleanup

* Fixed test

* Added int multiply disasm checks. Fixed SuperFileCheck namespace bug. Made SuperFileCheck anchors more likely to match.

* Update comments

* Update comments

* Update comments

* Update comments

* Formatting

* Fixing build

* Fixing build again

* minor rename

* Feedback. Removed use of FULL-LINE as it is more readable not strictly necessary. Forgot to add an additional instruction to a disasm test.

* Formatting
  • Loading branch information
TIHan authored Oct 20, 2022
1 parent 2b61381 commit ccd9d16
Show file tree
Hide file tree
Showing 5 changed files with 303 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ class Lowering final : public Phase
void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode);
#ifdef TARGET_XARCH
void LowerPutArgStk(GenTreePutArgStk* putArgStk);
GenTree* TryLowerMulToLshSubOrLshAdd(GenTreeOp* node);
#endif // TARGET_XARCH

bool TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* parent);
Expand Down
100 changes: 100 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,97 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
ContainCheckStoreIndir(node);
}

//----------------------------------------------------------------------------------------------
// Lowering::TryLowerMulToLshSubOrLshAdd:
// Lowers a tree MUL(X, CNS) to SUB(LSH(X, CNS_SHIFT), X)
// or
// Lowers a tree MUL(X, CNS) to ADD(LSH(X, CNS_SHIFT), X)
//
// Arguments:
// node - GT_MUL node of integral type
//
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerMulToLshSubOrLshAdd(GenTreeOp* node)
{
assert(node->OperIs(GT_MUL));

// We do not do this optimization in X86 as it is not recommended.
#if TARGET_X86
return nullptr;
#endif // TARGET_X86
if (!varTypeIsIntegral(node))
return nullptr;

if (node->gtOverflow())
return nullptr;

GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();

if (op1->isContained() || op2->isContained())
return nullptr;

if (!op1->OperIs(GT_LCL_VAR))
return nullptr;

if (!op2->IsCnsIntOrI())
return nullptr;

GenTreeIntConCommon* cns = op2->AsIntConCommon();
ssize_t cnsVal = cns->IconValue();

// Use GT_LSH if cnsVal is a power of two.
// This is handled in codegen.
if (isPow2(cnsVal))
return nullptr;

// Use GT_LEA if cnsVal is 3, 5, or 9.
// This is handled in codegen.
if (cnsVal == 3 || cnsVal == 5 || cnsVal == 9)
return nullptr;

ssize_t cnsValPlusOne = cnsVal + 1;
ssize_t cnsValMinusOne = cnsVal - 1;

bool useSub = isPow2(cnsValPlusOne);

if (!useSub && !isPow2(cnsValMinusOne))
return nullptr;

if (useSub)
{
cnsVal = cnsValPlusOne;
node->ChangeOper(GT_SUB);
}
else
{
cnsVal = cnsValMinusOne;
node->ChangeOper(GT_ADD);
}

unsigned int shiftAmount = genLog2(static_cast<uint64_t>(static_cast<size_t>(cnsVal)));
cns->SetIconValue(shiftAmount);

node->gtOp1 = comp->gtNewOperNode(GT_LSH, node->gtType, op1, cns);
node->gtOp2 = comp->gtClone(op1);

BlockRange().Remove(op1);
BlockRange().Remove(cns);
BlockRange().InsertBefore(node, node->gtGetOp2());
BlockRange().InsertBefore(node, cns);
BlockRange().InsertBefore(node, op1);
BlockRange().InsertBefore(node, node->gtGetOp1());

ContainCheckBinary(node);
ContainCheckShiftRotate(node->gtGetOp1()->AsOp());

return node;
}

//------------------------------------------------------------------------
// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node.
//
Expand All @@ -120,6 +211,15 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
{
assert(mul->OperIsMul());

if (mul->OperIs(GT_MUL))
{
GenTree* replacementNode = TryLowerMulToLshSubOrLshAdd(mul);
if (replacementNode != nullptr)
{
return replacementNode->gtNext;
}
}

ContainCheckMul(mul);

return mul->gtNext;
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/tools/SuperFileCheck/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ static string GetFullyQualifiedEnclosingTypeName(MethodDeclarationSyntax methodD
if (namespaceDecl != null)
{
var identifiers =
namespaceDecl.DescendantTokens().Where(x => x.IsKind(SyntaxKind.IdentifierToken)).Select(x => x.ValueText);
namespaceDecl.Name.DescendantTokens().Where(x => x.IsKind(SyntaxKind.IdentifierToken)).Select(x => x.ValueText);
return $"{String.Join(".", identifiers)}.{qualifiedTypeName}";
}

Expand Down Expand Up @@ -373,8 +373,8 @@ static string PreProcessMethod(MethodDeclarationInfo methodDeclInfo, string[] ch
var methodName = methodDeclInfo.FullyQualifiedName.Replace("*", "{{.*}}"); // Change wild-card to FileCheck wild-card syntax.

// Create anchors from the first prefix.
var startAnchorText = $"// {checkPrefixes[0]}-LABEL: {methodName}";
var endAnchorText = $"// {checkPrefixes[0]}: {methodName}";
var startAnchorText = $"// {checkPrefixes[0]}-LABEL: for method {methodName}";
var endAnchorText = $"// {checkPrefixes[0]}: for method {methodName}";

// Create temp source file based on the source text of the method.
// Newlines are added to pad the text so FileCheck's error messages will correspond
Expand Down
182 changes: 182 additions & 0 deletions src/tests/JIT/opt/Multiply/IntMultiply.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;

namespace CodeGenTests
{
static class IntMultiply
{
[MethodImpl(MethodImplOptions.NoInlining)]
static uint UInt32_MultiplyWithUInt32MaxValue(uint value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: neg [[REG0]]
return value * UInt32.MaxValue;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWithUInt32MaxValue(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 32
// X64-NEXT: sub [[REG0]], [[REG1]]
return value * (ulong)UInt32.MaxValue;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWithUInt32MaxValuePlusOne(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 32
return value * ((ulong)UInt32.MaxValue + 1);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWithUInt32MaxValuePlusTwo(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 32
// X64-NEXT: add [[REG0]], [[REG1]]
return value * ((ulong)UInt32.MaxValue + 2);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith2(ulong value)
{
// X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+[[REG1]]{{\]}}
return value * 2;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith3(ulong value)
{
// X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+2*[[REG1]]{{\]}}
return value * 3;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith4(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 2
return value * 4;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith5(ulong value)
{
// X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+4*[[REG1]]{{\]}}
return value * 5;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith6(ulong value)
{
// X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+2*[[REG1]]{{\]}}
// X64-NEXT: add [[REG0]], [[REG0]]
return value * 6;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith7(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 3
// X64-NEXT: sub [[REG0]], [[REG1]]
return value * 7;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith8(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 3
return value * 8;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith9(ulong value)
{
// X64: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+8*[[REG1]]{{\]}}
return value * 9;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith15(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 4
// X64-NEXT: sub [[REG0]], [[REG1]]
return value * 15;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith16(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 4
return value * 16;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith17(ulong value)
{
// X64: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-NEXT: shl [[REG0]], 4
// X64-NEXT: add [[REG0]], [[REG1]]
return value * 17;
}

static int Main()
{
if (UInt32_MultiplyWithUInt32MaxValue(1) != UInt32.MaxValue)
return 0;

if (UInt64_MultiplyWithUInt32MaxValue(1) != (ulong)UInt32.MaxValue)
return 0;

if (UInt64_MultiplyWithUInt32MaxValuePlusOne(1) != ((ulong)UInt32.MaxValue + 1))
return 0;

if (UInt64_MultiplyWithUInt32MaxValuePlusTwo(1) != ((ulong)UInt32.MaxValue + 2))
return 0;

if (UInt64_MultiplyWith2(1) != 2)
return 0;

if (UInt64_MultiplyWith3(1) != 3)
return 0;

if (UInt64_MultiplyWith4(1) != 4)
return 0;

if (UInt64_MultiplyWith5(1) != 5)
return 0;

if (UInt64_MultiplyWith6(1) != 6)
return 0;

if (UInt64_MultiplyWith7(1) != 7)
return 0;

if (UInt64_MultiplyWith8(1) != 8)
return 0;

if (UInt64_MultiplyWith9(1) != 9)
return 0;

if (UInt64_MultiplyWith15(1) != 15)
return 0;

if (UInt64_MultiplyWith16(1) != 16)
return 0;

if (UInt64_MultiplyWith17(1) != 17)
return 0;

return 100;
}
}
}
17 changes: 17 additions & 0 deletions src/tests/JIT/opt/Multiply/IntMultiply.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs">
<HasDisasmCheck>true</HasDisasmCheck>
</Compile>

<CLRTestEnvironmentVariable Include="COMPlus_TieredCompilation" Value="0" />
<CLRTestEnvironmentVariable Include="COMPlus_JITMinOpts" Value="0" />
</ItemGroup>
</Project>

0 comments on commit ccd9d16

Please sign in to comment.