Skip to content

Commit

Permalink
Arm64/Sve: Add SignExtendWidening* and ZeroExtendWidening* math APIs (d…
Browse files Browse the repository at this point in the history
…otnet#101743)

* Add SignExtendWidening* and ZeroExtendWidening* APIs

* Map API to instruction

* Add missing SignExtendWidening* APIs

* Add test cases

* fix bugs

* Forgot to push the template

* jit format

* Fix the LargeVectorSize
  • Loading branch information
kunalspathak authored and Ruihan-Yin committed May 30, 2024
1 parent 8121b7a commit 5ee8489
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 29 deletions.
16 changes: 11 additions & 5 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2185,7 +2185,7 @@ void emitter::emitInsSve_R_R(instruction ins,
case INS_sve_uunpkhi:
case INS_sve_uunpklo:
assert(insScalableOptsNone(sopt));
assert(insOptsScalableAtLeastHalf(opt));
assert(insOptsScalableWide(opt));
assert(isVectorRegister(reg1));
assert(isVectorRegister(reg2));
assert(isScalableVectorSize(size));
Expand Down Expand Up @@ -11805,7 +11805,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)

case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
Expand All @@ -11814,6 +11813,13 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
code |= insEncodeSveElemsize(optGetSveElemsize((insOpts)(id->idInsOpt() + 1))); // xx
dst += emitOutput_Instr(dst, code);
break;
case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
Expand Down Expand Up @@ -14389,7 +14395,7 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
assert(insOptsScalableWide(id->idInsOpt()));
assert(isVectorRegister(id->idReg1()));
assert(isVectorRegister(id->idReg2()));
break;
Expand Down Expand Up @@ -16231,8 +16237,8 @@ void emitter::emitDispInsSveHelp(instrDesc* id)
case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts
// <Zd>.<T>, <Zn>.<Tb>
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
emitDispSveReg(id->idReg2(), (insOpts)((unsigned)id->idInsOpt() - 1), false);
emitDispSveReg(id->idReg1(), (insOpts)(id->idInsOpt() + 1), true);
emitDispSveReg(id->idReg2(), id->idInsOpt(), false);
break;

// <Zd>.<T>, <Zn>.<T>
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,16 @@ HARDWARE_INTRINSIC(Sve, Multiply,
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, true, {INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, true, {INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, true, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, true, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,48 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<double> Subtract(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// SignExtendWideningLower : Unpack and extend low half

/// <summary>
/// svint16_t svunpklo[_s16](svint8_t op)
/// SUNPKLO Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<short> SignExtendWideningLower(Vector<sbyte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svunpklo[_s32](svint16_t op)
/// SUNPKLO Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<int> SignExtendWideningLower(Vector<short> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svunpklo[_s64](svint32_t op)
/// SUNPKLO Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<long> SignExtendWideningLower(Vector<int> value) { throw new PlatformNotSupportedException(); }


/// SignExtendWideningUpper : Unpack and extend high half

/// <summary>
/// svint16_t svunpkhi[_s16](svint8_t op)
/// SUNPKHI Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<short> SignExtendWideningUpper(Vector<sbyte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svunpkhi[_s32](svint16_t op)
/// SUNPKHI Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<int> SignExtendWideningUpper(Vector<short> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svunpkhi[_s64](svint32_t op)
/// SUNPKHI Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<long> SignExtendWideningUpper(Vector<int> value) { throw new PlatformNotSupportedException(); }


/// UnzipEven : Concatenate even elements from two inputs

/// <summary>
Expand Down Expand Up @@ -1469,13 +1511,52 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> ZeroExtend8(Vector<ulong> value) { throw new PlatformNotSupportedException(); }

/// ZeroExtendWideningLower : Unpack and extend low half

/// <summary>
/// svuint16_t svunpklo[_u16](svuint8_t op)
/// UUNPKLO Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<ushort> ZeroExtendWideningLower(Vector<byte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svunpklo[_u32](svuint16_t op)
/// UUNPKLO Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<uint> ZeroExtendWideningLower(Vector<ushort> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svunpklo[_u64](svuint32_t op)
/// UUNPKLO Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<ulong> ZeroExtendWideningLower(Vector<uint> value) { throw new PlatformNotSupportedException(); }


/// ZeroExtendWideningUpper : Unpack and extend high half

/// <summary>
/// svuint16_t svunpkhi[_u16](svuint8_t op)
/// UUNPKHI Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<ushort> ZeroExtendWideningUpper(Vector<byte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svunpkhi[_u32](svuint16_t op)
/// UUNPKHI Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<uint> ZeroExtendWideningUpper(Vector<ushort> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svunpkhi[_u64](svuint32_t op)
/// UUNPKHI Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<ulong> ZeroExtendWideningUpper(Vector<uint> value) { throw new PlatformNotSupportedException(); }

/// ZipHigh : Interleave elements from high halves of two inputs

/// <summary>
/// svuint8_t svzip2[_u8](svuint8_t op1, svuint8_t op2)
/// ZIP2 Zresult.B, Zop1.B, Zop2.B
/// svbool_t svzip2_b8(svbool_t op1, svbool_t op2)
/// ZIP2 Presult.B, Pop1.B, Pop2.B
/// </summary>
public static unsafe Vector<byte> ZipHigh(Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,48 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<long> SignExtend8(Vector<long> value) => SignExtend8(value);


/// SignExtendWideningLower : Unpack and extend low half

/// <summary>
/// svint16_t svunpklo[_s16](svint8_t op)
/// SUNPKLO Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<short> SignExtendWideningLower(Vector<sbyte> value) => SignExtendWideningLower(value);

/// <summary>
/// svint32_t svunpklo[_s32](svint16_t op)
/// SUNPKLO Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<int> SignExtendWideningLower(Vector<short> value) => SignExtendWideningLower(value);

/// <summary>
/// svint64_t svunpklo[_s64](svint32_t op)
/// SUNPKLO Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<long> SignExtendWideningLower(Vector<int> value) => SignExtendWideningLower(value);


/// SignExtendWideningUpper : Unpack and extend high half

/// <summary>
/// svint16_t svunpkhi[_s16](svint8_t op)
/// SUNPKHI Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<short> SignExtendWideningUpper(Vector<sbyte> value) => SignExtendWideningUpper(value);

/// <summary>
/// svint32_t svunpkhi[_s32](svint16_t op)
/// SUNPKHI Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<int> SignExtendWideningUpper(Vector<short> value) => SignExtendWideningUpper(value);

/// <summary>
/// svint64_t svunpkhi[_s64](svint32_t op)
/// SUNPKHI Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<long> SignExtendWideningUpper(Vector<int> value) => SignExtendWideningUpper(value);

/// Subtract : Subtract

/// <summary>
Expand Down Expand Up @@ -1552,16 +1594,55 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> ZeroExtend8(Vector<ulong> value) => ZeroExtend8(value);

/// ZeroExtendWideningLower : Unpack and extend low half

/// <summary>
/// svuint16_t svunpklo[_u16](svuint8_t op)
/// UUNPKLO Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<ushort> ZeroExtendWideningLower(Vector<byte> value) => ZeroExtendWideningLower(value);

/// <summary>
/// svuint32_t svunpklo[_u32](svuint16_t op)
/// UUNPKLO Zresult.S, Zop.H
/// </summary>
public static unsafe Vector<uint> ZeroExtendWideningLower(Vector<ushort> value) => ZeroExtendWideningLower(value);

/// <summary>
/// svuint64_t svunpklo[_u64](svuint32_t op)
/// UUNPKLO Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<ulong> ZeroExtendWideningLower(Vector<uint> value) => ZeroExtendWideningLower(value);


/// ZeroExtendWideningUpper : Unpack and extend high half

/// <summary>
/// svuint16_t svunpkhi[_u16](svuint8_t op)
/// UUNPKHI Zresult.H, Zop.B
/// </summary>
public static unsafe Vector<ushort> ZeroExtendWideningUpper(Vector<byte> value) => ZeroExtendWideningUpper(value);

/// <summary>
/// svuint32_t svunpkhi[_u32](svuint16_t op)
/// UUNPKHI Zresult.S, Zop.H
/// svbool_t svunpkhi[_b](svbool_t op)
/// PUNPKHI Presult.H, Pop.B
/// </summary>
public static unsafe Vector<uint> ZeroExtendWideningUpper(Vector<ushort> value) => ZeroExtendWideningUpper(value);

/// <summary>
/// svuint64_t svunpkhi[_u64](svuint32_t op)
/// UUNPKHI Zresult.D, Zop.S
/// </summary>
public static unsafe Vector<ulong> ZeroExtendWideningUpper(Vector<uint> value) => ZeroExtendWideningUpper(value);

/// ZipHigh : Interleave elements from high halves of two inputs

/// <summary>
/// svuint8_t svzip2[_u8](svuint8_t op1, svuint8_t op2)
/// ZIP2 Zresult.B, Zop1.B, Zop2.B
/// svbool_t svzip2_b8(svbool_t op1, svbool_t op2)
/// ZIP2 Presult.B, Pop1.B, Pop2.B
/// </summary>


public static unsafe Vector<byte> ZipHigh(Vector<byte> left, Vector<byte> right) => ZipHigh(left, right);

/// <summary>
Expand Down
Loading

0 comments on commit 5ee8489

Please sign in to comment.