Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start vectorizing TensorPrimitives #91596

Merged
merged 5 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
<Compile Include="System\ThrowHelper.cs" />
</ItemGroup>

<ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'">
<Compile Include="System\Numerics\Tensors\TensorPrimitives.netstandard.cs" />
</ItemGroup>

<ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
<Compile Include="System\Numerics\Tensors\TensorPrimitives.netcore.cs" />
</ItemGroup>

<ItemGroup>
<InternalsVisibleTo Include="System.Numerics.Tensors.Tests" Key="00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,52 +4,8 @@
namespace System.Numerics.Tensors
{
/// <summary>Performs primitive tensor operations over spans of memory.</summary>
public static class TensorPrimitives
public static partial class TensorPrimitives
{
/// <summary>Computes the element-wise result of: <c><paramref name="x" /> + <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a span.</param>
/// <param name="destination">The destination tensor, represented as a span.</param>
/// <exception cref="ArgumentException">Length of '<paramref name="x" />' must be same as length of '<paramref name="y" />'.</exception>
/// <exception cref="ArgumentException">Destination is too short.</exception>
/// <remarks>This method effectively does <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" />[i]</c>.</remarks>
public static void Add(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span<float> destination)
{
if (x.Length != y.Length)
{
ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
}

if (x.Length > destination.Length)
{
ThrowHelper.ThrowArgument_DestinationTooShort();
}

for (int i = 0; i < x.Length; i++)
{
destination[i] = x[i] + y[i];
}
}

/// <summary>Computes the element-wise result of: <c><paramref name="x" /> + <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a scalar.</param>
/// <param name="destination">The destination tensor, represented as a span.</param>
/// <exception cref="ArgumentException">Destination is too short.</exception>
/// <remarks>This method effectively does <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" /></c>.</remarks>
public static void Add(ReadOnlySpan<float> x, float y, Span<float> destination)
{
if (x.Length > destination.Length)
{
ThrowHelper.ThrowArgument_DestinationTooShort();
}

for (int i = 0; i < x.Length; i++)
{
destination[i] = x[i] + y;
}
}

/// <summary>Computes the element-wise result of: <c><paramref name="x" /> - <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a scalar.</param>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System.Numerics.Tensors
{
/// <summary>Performs primitive tensor operations over spans of memory.</summary>
public static partial class TensorPrimitives
{
/// <summary>Computes the element-wise result of: <c><paramref name="x" /> + <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a span.</param>
/// <param name="destination">The destination tensor, represented as a span.</param>
/// <exception cref="ArgumentException">Length of '<paramref name="x" />' must be same as length of '<paramref name="y" />'.</exception>
/// <exception cref="ArgumentException">Destination is too short.</exception>
/// <remarks>This method effectively does <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" />[i]</c>.</remarks>
public static unsafe void Add(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span<float> destination)
{
if (x.Length != y.Length)
{
ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
}

if (x.Length > destination.Length)
{
ThrowHelper.ThrowArgument_DestinationTooShort();
}

ref float xRef = ref MemoryMarshal.GetReference(x);
ref float yRef = ref MemoryMarshal.GetReference(y);
ref float dRef = ref MemoryMarshal.GetReference(destination);
int i = 0, oneVectorFromEnd;

#if NET8_0_OR_GREATER
if (Vector512.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector512<float>.Count;
while (i <= oneVectorFromEnd)
{
Vector512<float> sum = Vector512.LoadUnsafe(ref xRef, (uint)i) + Vector512.LoadUnsafe(ref yRef, (uint)i);
Vector512.StoreUnsafe(sum, ref dRef, (uint)i);

i += Vector512<float>.Count;
}
}
#endif

if (Vector256.IsHardwareAccelerated)
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
{
oneVectorFromEnd = x.Length - Vector256<float>.Count;
while (i <= oneVectorFromEnd)
{
Vector256<float> sum = Vector256.LoadUnsafe(ref xRef, (uint)i) + Vector256.LoadUnsafe(ref yRef, (uint)i);
Vector256.StoreUnsafe(sum, ref dRef, (uint)i);

i += Vector256<float>.Count;
}
}

if (Vector128.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector128<float>.Count;
while (i <= oneVectorFromEnd)
{
Vector128<float> sum = Vector128.LoadUnsafe(ref xRef, (uint)i) + Vector128.LoadUnsafe(ref yRef, (uint)i);
Vector128.StoreUnsafe(sum, ref dRef, (uint)i);

i += Vector128<float>.Count;
}
}

while (i < x.Length)
{
Unsafe.Add(ref dRef, i) = Unsafe.Add(ref xRef, i) + Unsafe.Add(ref yRef, i);

i++;
}
}

/// <summary>Computes the element-wise result of: <c><paramref name="x" /> + <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a scalar.</param>
/// <param name="destination">The destination tensor, represented as a span.</param>
/// <exception cref="ArgumentException">Destination is too short.</exception>
/// <remarks>This method effectively does <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" /></c>.</remarks>
public static void Add(ReadOnlySpan<float> x, float y, Span<float> destination)
{
if (x.Length > destination.Length)
{
ThrowHelper.ThrowArgument_DestinationTooShort();
}

ref float xRef = ref MemoryMarshal.GetReference(x);
ref float dRef = ref MemoryMarshal.GetReference(destination);
int i = 0, oneVectorFromEnd;

#if NET8_0_OR_GREATER
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
if (Vector512.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector512<float>.Count;
if (i <= oneVectorFromEnd)
{
Vector512<float> yVec = Vector512.Create(y);
do
{
Vector512<float> sum = Vector512.LoadUnsafe(ref xRef, (uint)i) + yVec;
Vector512.StoreUnsafe(sum, ref dRef, (uint)i);

i += Vector512<float>.Count;
}
while (i <= oneVectorFromEnd);
}
}
#endif

if (Vector256.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector256<float>.Count;
if (i <= oneVectorFromEnd)
{
Vector256<float> yVec = Vector256.Create(y);
do
{
Vector256<float> sum = Vector256.LoadUnsafe(ref xRef, (uint)i) + yVec;
Vector256.StoreUnsafe(sum, ref dRef, (uint)i);

i += Vector256<float>.Count;
}
while (i <= oneVectorFromEnd);
}
}

if (Vector128.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector128<float>.Count;
if (i <= oneVectorFromEnd)
{
Vector128<float> yVec = Vector128.Create(y);
do
{
Vector128<float> sum = Vector128.LoadUnsafe(ref xRef, (uint)i) + yVec;
Vector128.StoreUnsafe(sum, ref dRef, (uint)i);

i += Vector128<float>.Count;
}
while (i <= oneVectorFromEnd);
}
}

while (i < x.Length)
{
Unsafe.Add(ref dRef, i) = Unsafe.Add(ref xRef, i) + y;

i++;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;

namespace System.Numerics.Tensors
{
/// <summary>Performs primitive tensor operations over spans of memory.</summary>
public static unsafe partial class TensorPrimitives
{
/// <summary>Computes the element-wise result of: <c><paramref name="x" /> + <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a span.</param>
/// <param name="destination">The destination tensor, represented as a span.</param>
/// <exception cref="ArgumentException">Length of '<paramref name="x" />' must be same as length of '<paramref name="y" />'.</exception>
/// <exception cref="ArgumentException">Destination is too short.</exception>
/// <remarks>This method effectively does <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" />[i]</c>.</remarks>
public static void Add(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span<float> destination)
{
if (x.Length != y.Length)
{
ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
}

if (x.Length > destination.Length)
{
ThrowHelper.ThrowArgument_DestinationTooShort();
}

fixed (float* xPtr = &MemoryMarshal.GetReference(x), yPtr = &MemoryMarshal.GetReference(y), destPtr = &MemoryMarshal.GetReference(destination))
{
float* px = xPtr, py = yPtr, pd = destPtr;
int i = 0, oneVectorFromEnd;

if (Vector.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector<float>.Count;
if (oneVectorFromEnd >= 0)
{
do
{
*(Vector<float>*)(pd + i) = *(Vector<float>*)(px + i) + *(Vector<float>*)(py + i);

i += Vector<float>.Count;
}
while (i <= oneVectorFromEnd);
}
}

while (i < x.Length)
{
*(pd + i) = *(px + i) + *(py + i);

i++;
}
}
}

/// <summary>Computes the element-wise result of: <c><paramref name="x" /> + <paramref name="y" /></c>.</summary>
/// <param name="x">The first tensor, represented as a span.</param>
/// <param name="y">The second tensor, represented as a scalar.</param>
/// <param name="destination">The destination tensor, represented as a span.</param>
/// <exception cref="ArgumentException">Destination is too short.</exception>
/// <remarks>This method effectively does <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" /></c>.</remarks>
public static void Add(ReadOnlySpan<float> x, float y, Span<float> destination)
{
if (x.Length > destination.Length)
{
ThrowHelper.ThrowArgument_DestinationTooShort();
}

fixed (float* xPtr = &MemoryMarshal.GetReference(x), destPtr = &MemoryMarshal.GetReference(destination))
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
{
float* px = xPtr, pd = destPtr;
int i = 0, oneVectorFromEnd;

if (Vector.IsHardwareAccelerated)
{
oneVectorFromEnd = x.Length - Vector<float>.Count;
if (oneVectorFromEnd >= 0)
{
Vector<float> yVec = new Vector<float>(y);
do
{
*(Vector<float>*)(pd + i) = *(Vector<float>*)(px + i) + yVec;

i += Vector<float>.Count;
}
while (i <= oneVectorFromEnd);
}
}

while (i < x.Length)
{
*(pd + i) = *(px + i) + y;

i++;
}
}
}
}
}
Loading