From 83ad644afaac23577e3563d3ec1fac1b1fde37f4 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Wed, 4 Sep 2024 08:13:24 +0800 Subject: [PATCH] [X86][AVX10.2] Support AVX10.2-BF16 new instructions. (#101603) Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- clang/include/clang/Basic/BuiltinsX86.def | 62 + clang/lib/Basic/Targets/X86.cpp | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 23 + clang/lib/Headers/CMakeLists.txt | 2 + clang/lib/Headers/avx10_2_512bf16intrin.h | 565 +++ clang/lib/Headers/avx10_2bf16intrin.h | 1091 ++++++ clang/lib/Headers/immintrin.h | 2 + clang/lib/Sema/SemaX86.cpp | 9 + .../CodeGen/X86/avx10_2_512bf16-builtins.c | 1085 ++++++ clang/test/CodeGen/X86/avx10_2bf16-builtins.c | 2082 ++++++++++++ llvm/include/llvm/IR/IntrinsicsX86.td | 253 ++ .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 8 +- .../X86/MCTargetDesc/X86ATTInstPrinter.cpp | 12 +- .../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 11 + .../X86/MCTargetDesc/X86IntelInstPrinter.cpp | 9 + llvm/lib/Target/X86/X86ISelLowering.cpp | 37 +- llvm/lib/Target/X86/X86InstrAVX10.td | 315 ++ llvm/lib/Target/X86/X86InstrAVX512.td | 4 +- llvm/lib/Target/X86/X86InstrFMA3Info.cpp | 35 +- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 5 + llvm/lib/Target/X86/X86InstrUtils.td | 6 +- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 54 + llvm/test/CodeGen/X86/avx10.2-fma-commute.ll | 1244 +++++++ .../test/CodeGen/X86/avx10_2_512bf16-arith.ll | 587 ++++ .../CodeGen/X86/avx10_2_512bf16-intrinsics.ll | 230 ++ llvm/test/CodeGen/X86/avx10_2bf16-arith.ll | 1168 +++++++ .../CodeGen/X86/avx10_2bf16-intrinsics.ll | 602 ++++ .../MC/Disassembler/X86/avx10.2-bf16-32.txt | 3015 +++++++++++++++++ .../MC/Disassembler/X86/avx10.2-bf16-64.txt | 3015 +++++++++++++++++ llvm/test/MC/X86/avx10.2-bf16-32-att.s | 3014 ++++++++++++++++ llvm/test/MC/X86/avx10.2-bf16-32-intel.s | 3014 ++++++++++++++++ llvm/test/MC/X86/avx10.2-bf16-64-att.s | 3014 ++++++++++++++++ llvm/test/MC/X86/avx10.2-bf16-64-intel.s | 3014 ++++++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 494 +++ 34 files changed, 28058 insertions(+), 24 deletions(-) create mode 100644 clang/lib/Headers/avx10_2_512bf16intrin.h create mode 100644 clang/lib/Headers/avx10_2bf16intrin.h create mode 100644 clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c create mode 100644 clang/test/CodeGen/X86/avx10_2bf16-builtins.c create mode 100644 llvm/test/CodeGen/X86/avx10.2-fma-commute.ll create mode 100644 llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll create mode 100644 llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll create mode 100644 llvm/test/CodeGen/X86/avx10_2bf16-arith.ll create mode 100644 llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt create mode 100644 llvm/test/MC/X86/avx10.2-bf16-32-att.s create mode 100644 llvm/test/MC/X86/avx10.2-bf16-32-intel.s create mode 100644 llvm/test/MC/X86/avx10.2-bf16-64-att.s create mode 100644 llvm/test/MC/X86/avx10.2-bf16-64-intel.s diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index e4aa8661b9a806..48376ee0527980 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2261,6 +2261,68 @@ TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_512_mask, "V32cV32xV32cUi", "nV:512:" TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512") + +// AVX10.2 BF16 +TARGET_BUILTIN(__builtin_ia32_loadsbf16128_mask, "V8yV8yC*V8yUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_storesbf16128_mask, "vV8y*V8yUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vdivnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vdivnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vdivnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vmaxpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmaxpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmaxpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vminpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vminpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vminpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vmulnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmulnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmulnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vsubnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vsubnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vsubnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcomsbf16eq, "iV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcomsbf16lt, "iV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcomsbf16neq, "iV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcomsbf16ge, "iV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcomsbf16gt, "iV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcomsbf16le, "iV8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcmppbf16512_mask,"UiV32yV32yIiUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcmppbf16256_mask,"UsV16yV16yIiUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcmppbf16128_mask,"UcV8yV8yIiUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16128_mask, "UcV8yIiUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16256_mask, "UsV16yIiUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16512_mask, "UiV32yIiUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vscalefpbf16128_mask, "V8yV8yV8yV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vscalefpbf16256_mask, "V16yV16yV16yV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vscalefpbf16512_mask, "V32yV32yV32yV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vrcppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vrcppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vrcppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vgetexppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vgetexppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vgetexppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vreducenepbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vreducenepbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vreducenepbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16, "V8yV8y", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16256, "V16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16512, "V32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index a9cbdb7b10dff8..62c382b67ad14a 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -306,6 +306,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasAVX10_1_512 = true; } else if (Feature == "+avx10.2-256") { HasAVX10_2 = true; + HasFullBFloat16 = true; } else if (Feature == "+avx10.2-512") { HasAVX10_2_512 = true; } else if (Feature == "+avx512cd") { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e4d169d2ad6030..786c2c224b3496 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14728,6 +14728,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storeups512_mask: return EmitX86MaskedStore(*this, Ops, Align(1)); + case X86::BI__builtin_ia32_storesbf16128_mask: case X86::BI__builtin_ia32_storesh128_mask: case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: @@ -14836,6 +14837,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmaddph512_mask: case X86::BI__builtin_ia32_vfmaddph512_maskz: case X86::BI__builtin_ia32_vfmaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddnepbh128: + case X86::BI__builtin_ia32_vfmaddnepbh256: + case X86::BI__builtin_ia32_vfmaddnepbh512: case X86::BI__builtin_ia32_vfmaddps512_mask: case X86::BI__builtin_ia32_vfmaddps512_maskz: case X86::BI__builtin_ia32_vfmaddps512_mask3: @@ -14920,6 +14924,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_loaddqudi512_mask: return EmitX86MaskedLoad(*this, Ops, Align(1)); + case X86::BI__builtin_ia32_loadsbf16128_mask: case X86::BI__builtin_ia32_loadsh128_mask: case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: @@ -16074,6 +16079,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: + case X86::BI__builtin_ia32_vsqrtnepbf16256: + case X86::BI__builtin_ia32_vsqrtnepbf16: + case X86::BI__builtin_ia32_vsqrtnepbf16512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { if (Ops.size() == 2) { @@ -16293,6 +16301,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_vfpclasspbf16128_mask: + case X86::BI__builtin_ia32_vfpclasspbf16256_mask: + case X86::BI__builtin_ia32_vfpclasspbf16512_mask: case X86::BI__builtin_ia32_fpclassph128_mask: case X86::BI__builtin_ia32_fpclassph256_mask: case X86::BI__builtin_ia32_fpclassph512_mask: @@ -16307,6 +16318,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vfpclasspbf16128_mask: + ID = Intrinsic::x86_avx10_fpclass_nepbf16_128; + break; + case X86::BI__builtin_ia32_vfpclasspbf16256_mask: + ID = Intrinsic::x86_avx10_fpclass_nepbf16_256; + break; + case X86::BI__builtin_ia32_vfpclasspbf16512_mask: + ID = Intrinsic::x86_avx10_fpclass_nepbf16_512; + break; case X86::BI__builtin_ia32_fpclassph128_mask: ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; break; @@ -16465,6 +16485,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vcmppd256_round_mask: case X86::BI__builtin_ia32_vcmpps256_round_mask: case X86::BI__builtin_ia32_vcmpph256_round_mask: + case X86::BI__builtin_ia32_vcmppbf16512_mask: + case X86::BI__builtin_ia32_vcmppbf16256_mask: + case X86::BI__builtin_ia32_vcmppbf16128_mask: IsMaskFCmp = true; [[fallthrough]]; case X86::BI__builtin_ia32_cmpps: diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 5a62538792f301..e928b5b142827b 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -147,10 +147,12 @@ set(x86_files amxcomplexintrin.h amxfp16intrin.h amxintrin.h + avx10_2_512bf16intrin.h avx10_2_512convertintrin.h avx10_2_512minmaxintrin.h avx10_2_512niintrin.h avx10_2_512satcvtintrin.h + avx10_2bf16intrin.h avx10_2convertintrin.h avx10_2minmaxintrin.h avx10_2niintrin.h diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h new file mode 100644 index 00000000000000..392b7ae770c5b5 --- /dev/null +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -0,0 +1,565 @@ +/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512BF16INTRIN_H +#define __AVX10_2_512BF16INTRIN_H + +/* Define the default attributes for the functions in this file. */ +typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) { + return __builtin_bit_cast(__m512bh, _mm512_setzero_ps()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) { + return (__m512bh)__builtin_ia32_undef512(); +} + +static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) { + return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, + bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, + bf, bf, bf, bf, bf, bf, bf, bf, bf, bf}; +} + +static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh( + __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6, + __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17, + __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22, + __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27, + __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) { + return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25, + bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17, + bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9, + bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1}; +} + +#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \ + bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \ + bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \ + bf29, bf30, bf31, bf32) \ + _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \ + (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \ + (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \ + (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \ + (bf3), (bf2), (bf1)) + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_castpbf16_ps(__m512bh __a) { + return (__m512)__a; +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_castpbf16_pd(__m512bh __a) { + return (__m512d)__a; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_castpbf16_si512(__m512bh __a) { + return (__m512i)__a; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) { + return (__m512bh)__a; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castpd_pbh(__m512d __a) { + return (__m512bh)__a; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castsi512_pbh(__m512i __a) { + return (__m512bh)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS512 +_mm512_castpbf16512_pbh128(__m512bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +_mm512_castpbf16512_pbh256(__m512bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castpbf16128_pbh512(__m128bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castpbf16256_pbh512(__m256bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_zextpbf16128_pbh512(__m128bh __a) { + return __builtin_shufflevector( + __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_zextpbf16256_pbh512(__m256bh __a) { + return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) { + return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), + (__m512i)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_load_pbh(void const *__p) { + return *(const __m512bh *)__p; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_loadu_pbh(void const *__p) { + struct __loadu_pbh { + __m512bh_u __v; + } __attribute__((__packed__, __may_alias__)); + return ((const struct __loadu_pbh *)__p)->__v; +} + +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P, + __m512bh __A) { + *(__m512bh *)__P = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P, + __m512bh __A) { + struct __storeu_pbh { + __m512bh_u __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pbh *)__P)->__v = __A; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) { + return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W, + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { + return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, + (__v32hi)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_addne_pbh(__m512bh __A, __m512bh __B) { + return (__m512bh)((__v32bf)__A + (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_subne_pbh(__m512bh __A, __m512bh __B) { + return (__m512bh)((__v32bf)__A - (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mulne_pbh(__m512bh __A, __m512bh __B) { + return (__m512bh)((__v32bf)__A * (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_divne_pbh(__m512bh __A, __m512bh __B) { + return (__m512bh)((__v32bf)__A / (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)__builtin_ia32_vmaxpbf16512((__v32bf)__A, (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)__builtin_ia32_vminpbf16512((__v32bf)__A, (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +#define _mm512_cmp_pbh_mask(__A, __B, __P) \ + ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \ + (__v32bf)(__m512bh)(__B), \ + (int)(__P), (__mmask32) - 1)) + +#define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \ + ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \ + (__v32bf)(__m512bh)(__B), \ + (int)(__P), (__mmask32)(__U))) + +#define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \ + ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U))) + +#define _mm512_fpclass_pbh_mask(__A, imm) \ + ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1)) + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_scalef_pbh(__m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_vscalefpbf16512_mask( + (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(), + (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh( + __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_vscalefpbf16512_mask( + (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_vscalefpbf16512_mask( + (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(), + (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vrcppbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrcppbf16512_mask((__v32bf)__A, (__v32bf)__W, + (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrcppbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_getexp_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vgetexppbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vgetexppbf16512_mask( + (__v32bf)__A, (__v32bf)__W, (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vgetexppbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_rsqrt_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask( + (__v32bf)__A, (__v32bf)__W, (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); +} + +#define _mm512_reducene_pbh(__A, imm) \ + ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \ + (__mmask32) - 1)) + +#define _mm512_mask_reducene_pbh(__W, __U, __A, imm) \ + ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ + (__mmask32)(__U))) + +#define _mm512_maskz_reducene_pbh(__U, __A, imm) \ + ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ + (__mmask32)(__U))) + +#define _mm512_roundscalene_pbh(__A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ + (__mmask32) - 1)) + +#define _mm512_mask_roundscalene_pbh(__W, __U, __A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ + (__mmask32)(__U))) + +#define _mm512_maskz_roundscalene_pbh(__U, __A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ + (__mmask32)(__U))) + +#define _mm512_getmant_pbh(__A, __B, __C) \ + ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1)) + +#define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \ + ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v32bf)(__m512bh)(__W), (__mmask32)(__U))) + +#define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \ + ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U))) + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, + (__v32bf)_mm512_sqrt_pbh(__A), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddne_pbh( + __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddne_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddne_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, + -(__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubne_pbh( + __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubne_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubne_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmaddne_pbh( + __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmaddne_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmaddne_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, + -(__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsubne_pbh( + __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsubne_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsubne_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif +#endif diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h new file mode 100644 index 00000000000000..0a427b9b7418b9 --- /dev/null +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -0,0 +1,1091 @@ +/*===-------------- avx10_2bf16intrin.h - AVX10-BF16 intrinsics ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2BF16INTRIN_H +#define __AVX10_2BF16INTRIN_H + +typedef __bf16 __m128bh_u __attribute__((__vector_size__(16), __aligned__(1))); +typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) { + return __builtin_bit_cast(__m256bh, _mm256_setzero_ps()); +} + +static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_setzero_pbh(void) { + return __builtin_bit_cast(__m128bh, _mm_setzero_ps()); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castpbf16_ps(__m128bh __a) { + return (__m128)__a; +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_castpbf16_ps(__m256bh __a) { + return (__m256)__a; +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_castpbf16_pd(__m256bh __a) { + return (__m256d)__a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castpbf16_pd(__m128bh __a) { + return (__m128d)__a; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_castpbf16_si128(__m128bh __a) { + return (__m128i)__a; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_castpbf16_si256(__m256bh __a) { + return (__m256i)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castps_pbh(__m128 __a) { + return (__m128bh)__a; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_castps_pbh(__m256 __a) { + return (__m256bh)__a; +} + +static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtsbh_bf16(__m128bh __a) { + return __a[0]; +} + +static __inline__ __bf16 __DEFAULT_FN_ATTRS256 +_mm256_cvtsbh_bf16(__m256bh __a) { + return __a[0]; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castpd_pbh(__m128d __a) { + return (__m128bh)__a; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_castpd_pbh(__m256d __a) { + return (__m256bh)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_castsi128_pbh(__m128i __a) { + return (__m128bh)__a; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_castsi256_pbh(__m256i __a) { + return (__m256bh)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_castpbf16256_pbh128(__m256bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_castpbf16128_pbh256(__m128bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, + -1, -1, -1, -1, -1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_zextpbf16128_pbh256(__m128bh __a) { + return __builtin_shufflevector(__a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_undefined_pbh(void) { + return (__m256bh)__builtin_ia32_undef256(); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_load_sbh(void const *__dp) { + __m128bh src = (__v8bf)_mm_setzero_pbh(); + return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__dp, src, + 1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_load_sbh(__m128bh __W, __mmask8 __U, const void *__A) { + __m128bh src = (__v8bf)__builtin_shufflevector( + (__v8bf)__W, (__v8bf)_mm_setzero_pbh(), 0, 8, 8, 8, 8, 8, 8, 8); + + return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__A, src, + __U & 1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_load_sbh(__mmask8 __U, const void *__A) { + return (__m128bh)__builtin_ia32_loadsbf16128_mask( + (const __v8bf *)__A, (__v8bf)_mm_setzero_pbh(), __U & 1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_load_pbh(void const *__p) { + return *(const __m256bh *)__p; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_load_pbh(void const *__p) { + return *(const __m128bh *)__p; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_loadu_pbh(void const *__p) { + struct __loadu_pbh { + __m256bh_u __v; + } __attribute__((__packed__, __may_alias__)); + return ((const struct __loadu_pbh *)__p)->__v; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_loadu_pbh(void const *__p) { + struct __loadu_pbh { + __m128bh_u __v; + } __attribute__((__packed__, __may_alias__)); + return ((const struct __loadu_pbh *)__p)->__v; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sbh(void *__dp, + __m128bh __a) { + struct __mm_store_sbh_struct { + __bf16 __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store_sbh_struct *)__dp)->__u = __a[0]; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sbh(void *__W, + __mmask8 __U, + __m128bh __A) { + __builtin_ia32_storesbf16128_mask((__v8bf *)__W, __A, __U & 1); +} + +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_pbh(void *__P, + __m256bh __A) { + *(__m256bh *)__P = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_pbh(void *__P, + __m128bh __A) { + *(__m128bh *)__P = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_pbh(void *__P, + __m256bh __A) { + struct __storeu_pbh { + __m256bh_u __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pbh *)__P)->__v = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_pbh(void *__P, + __m128bh __A) { + struct __storeu_pbh { + __m128bh_u __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pbh *)__P)->__v = __A; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a, + __m128bh __b) { + __a[0] = __b[0]; + return __a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), + _mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_undefined_pbh(void) { + return (__m128bh)__builtin_ia32_undef128(); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_set_sbh(__bf16 bf) { + return (__v8bf)__builtin_shufflevector( + (__v8bf){bf, bf, bf, bf, bf, bf, bf, bf}, (__v8bf)_mm_setzero_pbh(), 0, 8, + 8, 8, 8, 8, 8, 8); +} + +static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_set1_pbh(__bf16 bf) { + return (__m128bh)(__v8bf){bf, bf, bf, bf, bf, bf, bf, bf}; +} + +static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set1_pbh(__bf16 bf) { + return (__m256bh)(__v16bf){bf, bf, bf, bf, bf, bf, bf, bf, + bf, bf, bf, bf, bf, bf, bf, bf}; +} + +static __inline __m128bh __DEFAULT_FN_ATTRS128 +_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, + __bf16 bf6, __bf16 bf7, __bf16 bf8) { + return (__m128bh)(__v8bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8}; +} + +static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set_pbh( + __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6, + __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) { + return (__m256bh)(__v16bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, + bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16}; +} + +#define _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8) \ + _mm_set_pbh((bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), (bf1)) + +#define _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \ + bf11, bf12, bf13, bf14, bf15, bf16) \ + _mm256_set_pbh((bf16), (bf15), (bf14), (bf13), (bf12), (bf11), (bf10), \ + (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), \ + (bf1)) + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_abs_pbh(__m256bh __A) { + return (__m256bh)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), + (__m256i)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_abs_pbh(__m128bh __A) { + return (__m128bh)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) { + return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, (__v8bf)__W, + (__v8bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) { + return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, (__v16bf)__W, + (__v16bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) { + return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, + (__v8hi)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) { + return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, + (__v16hi)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_permutexvar_pbh(__m128i __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_addne_pbh(__m256bh __A, __m256bh __B) { + return (__m256bh)((__v16bf)__A + (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_addne_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A + (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_addne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_addne_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_addne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, + (__v8bf)_mm_addne_pbh(__A, __B), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_subne_pbh(__m256bh __A, __m256bh __B) { + return (__m256bh)((__v16bf)__A - (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_subne_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A - (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_subne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_subne_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_subne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, + (__v8bf)_mm_subne_pbh(__A, __B), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mulne_pbh(__m256bh __A, __m256bh __B) { + return (__m256bh)((__v16bf)__A * (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mulne_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A * (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_mulne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_mulne_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_mulne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, + (__v8bf)_mm_mulne_pbh(__A, __B), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_divne_pbh(__m256bh __A, __m256bh __B) { + return (__m256bh)((__v16bf)__A / (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_divne_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A / (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_divne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_divne_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_divne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, + (__v8bf)_mm_divne_pbh(__A, __B), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_max_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)__builtin_ia32_vmaxpbf16256((__v16bf)__A, (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_max_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)__builtin_ia32_vmaxpbf16128((__v8bf)__A, (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_max_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_max_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_min_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)__builtin_ia32_vminpbf16256((__v16bf)__A, (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_min_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)__builtin_ia32_vminpbf16128((__v8bf)__A, (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_min_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comeqsbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomsbf16eq((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comltsbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomsbf16lt((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comlesbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomsbf16le((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgtsbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomsbf16gt((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgesbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomsbf16ge((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comneqsbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomsbf16neq((__v8bf)A, (__v8bf)B); +} + +#define _mm256_cmp_pbh_mask(__A, __B, __P) \ + ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(__A), \ + (__v16bf)(__m256bh)(__B), \ + (int)(__P), (__mmask16) - 1)) + +#define _mm256_mask_cmp_pbh_mask(__U, __A, __B, __P) \ + ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(__A), \ + (__v16bf)(__m256bh)(__B), \ + (int)(__P), (__mmask16)(__U))) + +#define _mm_cmp_pbh_mask(__A, __B, __P) \ + ((__mmask8)__builtin_ia32_vcmppbf16128_mask((__v8bf)(__m128bh)(__A), \ + (__v8bf)(__m128bh)(__B), \ + (int)(__P), (__mmask8) - 1)) + +#define _mm_mask_cmp_pbh_mask(__U, __A, __B, __P) \ + ((__mmask8)__builtin_ia32_vcmppbf16128_mask((__v8bf)(__m128bh)(__A), \ + (__v8bf)(__m128bh)(__B), \ + (int)(__P), (__mmask8)(__U))) + +#define _mm256_mask_fpclass_pbh_mask(__U, __A, imm) \ + ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16)(__U))) + +#define _mm256_fpclass_pbh_mask(__A, imm) \ + ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16) - 1)) + +#define _mm_mask_fpclass_pbh_mask(__U, __A, imm) \ + ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__mmask8)(__U))) + +#define _mm_fpclass_pbh_mask(__A, imm) \ + ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(__A), \ + (int)(imm), (__mmask8) - 1)) + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_scalef_pbh(__m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_vscalefpbf16256_mask( + (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_undefined_pbh(), + (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pbh( + __m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_vscalefpbf16256_mask( + (__v16bf)__A, (__v16bf)__B, (__v16bf)__W, (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_vscalefpbf16256_mask( + (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_setzero_pbh(), + (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_scalef_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)__builtin_ia32_vscalefpbf16128_mask( + (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_vscalefpbf16128_mask( + (__v8bf)__A, (__v8bf)__B, (__v8bf)__W, (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_vscalefpbf16128_mask( + (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_rcp_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vrcppbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrcppbf16256_mask((__v16bf)__A, (__v16bf)__W, + (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrcppbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rcp_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vrcppbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrcppbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrcppbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_getexp_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vgetexppbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vgetexppbf16256_mask( + (__v16bf)__A, (__v16bf)__W, (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vgetexppbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_getexp_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vgetexppbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vgetexppbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vgetexppbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_rsqrt_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask( + (__v16bf)__A, (__v16bf)__W, (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rsqrt_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +#define _mm256_reducene_pbh(__A, imm) \ + ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \ + (__mmask16) - 1)) + +#define _mm256_mask_reducene_pbh(__W, __U, __A, imm) \ + ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \ + (__mmask16)(__U))) + +#define _mm256_maskz_reducene_pbh(__U, __A, imm) \ + ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ + (__mmask16)(__U))) + +#define _mm_reducene_pbh(__A, imm) \ + ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \ + (__mmask8) - 1)) + +#define _mm_mask_reducene_pbh(__W, __U, __A, imm) \ + ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \ + (__mmask8)(__U))) + +#define _mm_maskz_reducene_pbh(__U, __A, imm) \ + ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ + (__mmask8)(__U))) + +#define _mm256_roundscalene_pbh(__A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ + (__mmask16) - 1)) + +#define _mm256_mask_roundscalene_pbh(__W, __U, __A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \ + (__mmask16)(__U))) + +#define _mm256_maskz_roundscalene_pbh(__U, __A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ + (__mmask16)(__U))) + +#define _mm_roundscalene_pbh(__A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ + (__mmask8) - 1)) + +#define _mm_mask_roundscalene_pbh(__W, __U, __A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \ + (__mmask8)(__U))) + +#define _mm_maskz_roundscalene_pbh(__U, __A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ + (__mmask8)(__U))) + +#define _mm256_getmant_pbh(__A, __B, __C) \ + ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v16bf)_mm256_undefined_pbh(), (__mmask16) - 1)) + +#define _mm256_mask_getmant_pbh(__W, __U, __A, __B, __C) \ + ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v16bf)(__m256bh)(__W), (__mmask16)(__U))) + +#define _mm256_maskz_getmant_pbh(__U, __A, __B, __C) \ + ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v16bf)_mm256_setzero_pbh(), (__mmask16)(__U))) + +#define _mm_getmant_pbh(__A, __B, __C) \ + ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v8bf)_mm_undefined_pbh(), (__mmask8) - 1)) + +#define _mm_mask_getmant_pbh(__W, __U, __A, __B, __C) \ + ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v8bf)(__m128bh)(__W), (__mmask8)(__U))) + +#define _mm_maskz_getmant_pbh(__U, __A, __B, __C) \ + ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U))) + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vsqrtnepbf16256((__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_sqrt_pbh(__A), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, + (__v16bf)_mm256_sqrt_pbh(__A), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vsqrtnepbf16((__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddne_pbh( + __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddne_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddne_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, + -(__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubne_pbh( + __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubne_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubne_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmaddne_pbh( + __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmaddne_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmaddne_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, + -(__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsubne_pbh( + __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsubne_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsubne_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmaddne_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsubne_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, + -(__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, + -(__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif +#endif diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index a922056622e79f..30fcc028958f33 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -649,6 +649,7 @@ _storebe_i64(void * __P, long long __D) { #endif #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) +#include #include #include #include @@ -656,6 +657,7 @@ _storebe_i64(void * __P, long long __D) { #endif #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__) +#include #include #include #include diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 311e574537059d..233a068c8574ce 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -875,6 +875,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_rndscaleps_mask: case X86::BI__builtin_ia32_rndscalepd_mask: case X86::BI__builtin_ia32_rndscaleph_mask: + case X86::BI__builtin_ia32_vrndscalenepbf16_128_mask: + case X86::BI__builtin_ia32_vrndscalenepbf16_256_mask: + case X86::BI__builtin_ia32_vrndscalenepbf16_mask: case X86::BI__builtin_ia32_reducepd128_mask: case X86::BI__builtin_ia32_reducepd256_mask: case X86::BI__builtin_ia32_reducepd512_mask: @@ -884,6 +887,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_reduceph128_mask: case X86::BI__builtin_ia32_reduceph256_mask: case X86::BI__builtin_ia32_reduceph512_mask: + case X86::BI__builtin_ia32_vreducenepbf16128_mask: + case X86::BI__builtin_ia32_vreducenepbf16256_mask: + case X86::BI__builtin_ia32_vreducenepbf16512_mask: case X86::BI__builtin_ia32_vreducepd256_round_mask: case X86::BI__builtin_ia32_vreduceps256_round_mask: case X86::BI__builtin_ia32_vreduceph256_round_mask: @@ -911,6 +917,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassph128_mask: case X86::BI__builtin_ia32_fpclassph256_mask: case X86::BI__builtin_ia32_fpclassph512_mask: + case X86::BI__builtin_ia32_vfpclasspbf16128_mask: + case X86::BI__builtin_ia32_vfpclasspbf16256_mask: + case X86::BI__builtin_ia32_vfpclasspbf16512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: case X86::BI__builtin_ia32_fpclasssh_mask: diff --git a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c new file mode 100644 index 00000000000000..b00859c174fbab --- /dev/null +++ b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c @@ -0,0 +1,1085 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s + +#include + +__m512bh test_mm512_setzero_pbh() { + // CHECK-LABEL: @test_mm512_setzero_pbh + // CHECK: zeroinitializer + return _mm512_setzero_pbh(); +} + +__m512bh test_mm512_undefined_pbh(void) { + // CHECK-LABEL: @test_mm512_undefined_pbh + // CHECK: ret <32 x bfloat> zeroinitializer + return _mm512_undefined_pbh(); +} + +__m512bh test_mm512_set1_pbh(__bf16 h) { + // CHECK-LABEL: @test_mm512_set1_pbh + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31 + return _mm512_set1_pbh(h); +} + +__m512bh test_mm512_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, + __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8, + __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, + __bf16 bf17, __bf16 bf18, __bf16 bf19, __bf16 bf20, + __bf16 bf21, __bf16 bf22, __bf16 bf23, __bf16 bf24, + __bf16 bf25, __bf16 bf26, __bf16 bf27, __bf16 bf28, + __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) { + // CHECK-LABEL: @test_mm512_set_pbh + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31 + return _mm512_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, + bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16, + bf17, bf18, bf19, bf20, bf21, bf22, bf23, bf24, + bf25, bf26, bf27, bf28, bf29, bf30, bf31, bf32); +} + +__m512bh test_mm512_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, + __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8, + __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, + __bf16 bf17, __bf16 bf18, __bf16 bf19, __bf16 bf20, + __bf16 bf21, __bf16 bf22, __bf16 bf23, __bf16 bf24, + __bf16 bf25, __bf16 bf26, __bf16 bf27, __bf16 bf28, + __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) { + // CHECK-LABEL: @test_mm512_setr_pbh + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30 + // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31 + return _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, + bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16, + bf17, bf18, bf19, bf20, bf21, bf22, bf23, bf24, + bf25, bf26, bf27, bf28, bf29, bf30, bf31, bf32); +} + +__m512 test_mm512_castpbf16_ps(__m512bh A) { + // CHECK-LABEL: test_mm512_castpbf16_ps + // CHECK: bitcast <32 x bfloat> %{{.*}} to <16 x float> + return _mm512_castpbf16_ps(A); +} + +__m512d test_mm512_castpbf16_pd(__m512bh A) { + // CHECK-LABEL: test_mm512_castpbf16_pd + // CHECK: bitcast <32 x bfloat> %{{.*}} to <8 x double> + return _mm512_castpbf16_pd(A); +} + +__m512i test_mm512_castpbf16_si512(__m512bh A) { + // CHECK-LABEL: test_mm512_castpbf16_si512 + // CHECK: bitcast <32 x bfloat> %{{.*}} to <8 x i64> + return _mm512_castpbf16_si512(A); +} + +__m512bh test_mm512_castps_pbh(__m512 A) { + // CHECK-LABEL: test_mm512_castps_pbh + // CHECK: bitcast <16 x float> %{{.*}} to <32 x bfloat> + return _mm512_castps_pbh(A); +} + +__m512bh test_mm512_castpd_pbh(__m512d A) { + // CHECK-LABEL: test_mm512_castpd_pbh + // CHECK: bitcast <8 x double> %{{.*}} to <32 x bfloat> + return _mm512_castpd_pbh(A); +} + +__m512bh test_mm512_castsi512_pbh(__m512i A) { + // CHECK-LABEL: test_mm512_castsi512_pbh + // CHECK: bitcast <8 x i64> %{{.*}} to <32 x bfloat> + return _mm512_castsi512_pbh(A); +} + +__m128bh test_mm512_castpbf16512_pbh128(__m512bh __a) { + // CHECK-LABEL: test_mm512_castpbf16512_pbh128 + // CHECK: shufflevector <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <8 x i32> + return _mm512_castpbf16512_pbh128(__a); +} + +__m256bh test_mm512_castpbf16512_pbh256(__m512bh __a) { + // CHECK-LABEL: test_mm512_castpbf16512_pbh256 + // CHECK: shufflevector <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <16 x i32> + return _mm512_castpbf16512_pbh256(__a); +} + +__m512bh test_mm512_castpbf16128_pbh512(__m128bh __a) { + // CHECK-LABEL: test_mm512_castpbf16128_pbh512 + // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <32 x i32> + return _mm512_castpbf16128_pbh512(__a); +} + +__m512bh test_mm512_castpbf16256_pbh512(__m256bh __a) { + // CHECK-LABEL: test_mm512_castpbf16256_pbh512 + // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <32 x i32> + return _mm512_castpbf16256_pbh512(__a); +} + +__m512bh test_mm512_zextpbf16128_pbh512(__m128bh __a) { + // CHECK-LABEL: test_mm512_zextpbf16128_pbh512 + // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <32 x i32> + return _mm512_zextpbf16128_pbh512(__a); +} + +__m512bh test_mm512_zextpbf16256_pbh512(__m256bh __a) { + // CHECK-LABEL: test_mm512_zextpbf16256_pbh512 + // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> {{.*}}, <32 x i32> + return _mm512_zextpbf16256_pbh512(__a); +} + +__m512bh test_mm512_abs_pbh(__m512bh a) { + // CHECK-LABEL: @test_mm512_abs_pbh + // CHECK: and <16 x i32> + return _mm512_abs_pbh(a); +} + +// VMOVSH + +__m512bh test_mm512_load_pbh(void *p) { + // CHECK-LABEL: @test_mm512_load_pbh + // CHECK: load <32 x bfloat>, ptr %{{.*}}, align 64 + return _mm512_load_pbh(p); +} + +__m512bh test_mm512_loadu_pbh(void *p) { + // CHECK-LABEL: @test_mm512_loadu_pbh + // CHECK: load <32 x bfloat>, ptr {{.*}}, align 1{{$}} + return _mm512_loadu_pbh(p); +} + +void test_mm512_store_pbh(void *p, __m512bh a) { + // CHECK-LABEL: @test_mm512_store_pbh + // CHECK: store <32 x bfloat> %{{.*}}, ptr %{{.*}}, align 64 + _mm512_store_pbh(p, a); +} + +void test_mm512_storeu_pbh(void *p, __m512bh a) { + // CHECK-LABEL: @test_mm512_storeu_pbh + // CHECK: store <32 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}} + // CHECK-NEXT: ret void + _mm512_storeu_pbh(p, a); +} + +__m512bh test_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) { + // CHECK-LABEL: @test_mm512_mask_blend_pbh + // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: %{{.*}} = select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_blend_pbh(__U, __A, __W); +} + +__m512bh test_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { + // CHECK-LABEL: @test_mm512_permutex2var_pbh + // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x i16> + // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16> + // CHECK: %{{.*}} = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) + // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x bfloat> + return _mm512_permutex2var_pbh(__A, __I, __B); +} + +__m512bh test_mm512_permutexvar_epi16(__m512i __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_permutexvar_epi16 + // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x i16> + // CHECK: %{{.*}} = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) + // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x bfloat> + return _mm512_permutexvar_pbh(__A, __B); +} + +__m512bh test_mm512_addne_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_addne_pbh + // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_addne_pbh(__A, __B); +} + +__m512bh test_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_addne_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_addne_pbh(__U, __A, __B); +} + +__m512bh test_mm512_subne_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_subne_pbh + // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_subne_pbh(__A, __B); +} + +__m512bh test_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_subne_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_subne_pbh(__U, __A, __B); +} + +__m512bh test_mm512_mulne_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_mulne_pbh + // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_mulne_pbh(__A, __B); +} + +__m512bh test_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_mulne_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_mulne_pbh(__U, __A, __B); +} + +__m512bh test_mm512_divne_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_divne_pbh + // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_divne_pbh(__A, __B); +} + +__m512bh test_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_divne_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_divne_pbh(__U, __A, __B); +} + +__m512bh test_mm512_max_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_max_pbh + // CHECK: @llvm.x86.avx10.vmaxpbf16512( + return _mm512_max_pbh(__A, __B); +} + +__m512bh test_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: @llvm.x86.avx10.vmaxpbf16512 + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_max_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: @llvm.x86.avx10.vmaxpbf16512 + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_max_pbh(__U, __A, __B); +} + +__m512bh test_mm512_min_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_min_pbh + // CHECK: @llvm.x86.avx10.vminpbf16512( + return _mm512_min_pbh(__A, __B); +} + +__m512bh test_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: @llvm.x86.avx10.vminpbf16512 + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_min_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK: @llvm.x86.avx10.vminpbf16512 + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_min_pbh(__U, __A, __B); +} + +__mmask32 test_mm512_cmp_pbh_mask_eq_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: @test_mm512_cmp_pbh_mask_eq_oq + // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_lt_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_lt_os + // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_LT_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_le_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_le_os + // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_LE_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_unord_q(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_unord_q + // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_UNORD_Q); +} + +__mmask32 test_mm512_cmp_pbh_mask_neq_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_uq + // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_nlt_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_nlt_us + // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NLT_US); +} + +__mmask32 test_mm512_cmp_pbh_mask_nle_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_nle_us + // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NLE_US); +} + +__mmask32 test_mm512_cmp_pbh_mask_ord_q(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_ord_q + // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_ORD_Q); +} + +__mmask32 test_mm512_cmp_pbh_mask_eq_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_uq + // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_nge_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_nge_us + // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NGE_US); +} + +__mmask32 test_mm512_cmp_pbh_mask_ngt_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_ngt_us + // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NGT_US); +} + +__mmask32 test_mm512_cmp_pbh_mask_false_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_false_oq + // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_neq_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_oq + // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_ge_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_ge_os + // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_GE_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_gt_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_gt_os + // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_GT_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_true_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_true_uq + // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_eq_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_os + // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_lt_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_lt_oq + // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_LT_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_le_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_le_oq + // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_LE_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_unord_s(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_unord_s + // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_UNORD_S); +} + +__mmask32 test_mm512_cmp_pbh_mask_neq_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_us + // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_US); +} + +__mmask32 test_mm512_cmp_pbh_mask_nlt_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_nlt_uq + // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NLT_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_nle_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_nle_uq + // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NLE_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_ord_s(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_ord_s + // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_ORD_S); +} + +__mmask32 test_mm512_cmp_pbh_mask_eq_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_us + // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_US); +} + +__mmask32 test_mm512_cmp_pbh_mask_nge_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_nge_uq + // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NGE_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_ngt_uq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_ngt_uq + // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NGT_UQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_false_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_false_os + // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_FALSE_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_neq_os(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_os + // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_OS); +} + +__mmask32 test_mm512_cmp_pbh_mask_ge_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_ge_oq + // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_GE_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_gt_oq(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_gt_oq + // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_GT_OQ); +} + +__mmask32 test_mm512_cmp_pbh_mask_true_us(__m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_cmp_pbh_mask_true_us + // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_cmp_pbh_mask(a, b, _CMP_TRUE_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_eq_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: @test_mm512_mask_cmp_pbh_mask_eq_oq + // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_lt_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_os + // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_le_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_os + // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_unord_q(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_q + // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_neq_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_uq + // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_us + // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_nle_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_us + // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_ord_q(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_q + // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_eq_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_uq + // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_nge_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_us + // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_us + // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_false_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_oq + // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_neq_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_oq + // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_ge_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_os + // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_gt_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_os + // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_true_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_uq + // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_eq_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_os + // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_lt_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_oq + // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_le_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_oq + // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_unord_s(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_s + // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_neq_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_us + // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_uq + // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_nle_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_uq + // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_ord_s(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_s + // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_eq_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_us + // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_nge_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_uq + // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_uq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_uq + // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_false_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_os + // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_neq_os(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_os + // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_ge_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_oq + // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_gt_oq(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_oq + // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ); +} + +__mmask32 test_mm512_mask_cmp_pbh_mask_true_us(__mmask32 m, __m512bh a, __m512bh b) { + // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_us + // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <32 x i1> %{{.*}}, %{{.*}} + return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US); +} + +__mmask32 test_mm512_mask_fpclass_pbh_mask(__mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_fpclass_pbh_mask + // CHECK: @llvm.x86.avx10.fpclass.nepbf16.512 + return _mm512_mask_fpclass_pbh_mask(__U, __A, 4); +} + +__mmask32 test_mm512_fpclass_pbh_mask(__m512bh __A) { + // CHECK-LABEL: @test_mm512_fpclass_pbh_mask + // CHECK: @llvm.x86.avx10.fpclass.nepbf16.512 + return _mm512_fpclass_pbh_mask(__A, 4); +} + +__m512bh test_mm512_scalef_pbh(__m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512 + return _mm512_scalef_pbh(__A, __B); +} + +__m512bh test_mm512_mask_scalef_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_mask_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512 + return _mm512_mask_scalef_pbh(__W, __U, __A, __B); +} + +__m512bh test_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_maskz_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512 + return _mm512_maskz_scalef_pbh(__U, __A, __B); +} + +__m512bh test_mm512_rcp_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512 + return _mm512_rcp_pbh(__A); +} + +__m512bh test_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512 + return (__m512bh)_mm512_mask_rcp_pbh(__W, __U, __A); +} + +__m512bh test_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512 + return _mm512_maskz_rcp_pbh(__U, __A); +} + +__m512bh test_mm512_getexp_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512 + return _mm512_getexp_pbh(__A); +} + +__m512bh test_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512 + return _mm512_mask_getexp_pbh(__W, __U, __A); +} + +__m512bh test_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512 + return _mm512_maskz_getexp_pbh(__U, __A); +} + +__m512bh test_mm512_rsqrt_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512 + return _mm512_rsqrt_pbh(__A); +} + +__m512bh test_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512 + return (__m512bh)_mm512_mask_rsqrt_pbh(__W, __U, __A); +} + +__m512bh test_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512 + return _mm512_maskz_rsqrt_pbh(__U, __A); +} + +__m512bh test_mm512_reducene_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512 + return _mm512_reducene_pbh(__A, 3); +} + +__m512bh test_mm512_mask_reducene_pbh(__m512bh __W, __mmask16 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512 + return _mm512_mask_reducene_pbh(__W, __U, __A, 1); +} + +__m512bh test_mm512_maskz_reducene_pbh(__mmask16 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512 + return _mm512_maskz_reducene_pbh(__U, __A, 1); +} + +__m512bh test_mm512_roundscalene_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512 + return _mm512_roundscalene_pbh(__A, 3); +} + +__m512bh test_mm512_mask_roundscalene_pbh(__m512bh __W, __mmask16 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512 + return _mm512_mask_roundscalene_pbh(__W, __U, __A, 1); +} + +__m512bh test_mm512_maskz_roundscalene_pbh(__mmask16 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512 + return _mm512_maskz_roundscalene_pbh(__U, __A, 1 ); +} + +__m512bh test_mm512_getmant_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512 + return _mm512_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m512bh test_mm512_mask_getmant_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512 + return _mm512_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m512bh test_mm512_maskz_getmant_pbh(__mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512 + return _mm512_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m512bh test_mm512_sqrt_pbh(__m512bh __A) { + // CHECK-LABEL: @test_mm512_sqrt_pbh + // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}}) + return _mm512_sqrt_pbh(__A); +} + +__m512bh test_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_mask_sqrt_pbh + // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}}) + // CHECK: bitcast i32 %{{.*}} to <32 x i1> + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return (__m512bh)_mm512_mask_sqrt_pbh(__W, __U, __A); +} + +__m512bh test_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { + // CHECK-LABEL: @test_mm512_maskz_sqrt_pbh + // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}}) + // CHECK: bitcast i32 %{{.*}} to <32 x i1> + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_sqrt_pbh(__U, __A); +} + +__m512bh test_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_fmaddne_pbh + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + return _mm512_fmaddne_pbh(__A, __B, __C); +} + +__m512bh test_mm512_mask_fmaddne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_mask_fmaddne_pbh + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_fmaddne_pbh(__A, __U, __B, __C); +} + +__m512bh test_mm512_mask3_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + // CHECK-LABEL: @test_mm512_mask3_fmaddne_pbh + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask3_fmaddne_pbh(__A, __B, __C, __U); +} + +__m512bh test_mm512_maskz_fmaddne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_maskz_fmaddne_pbh + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_fmaddne_pbh(__U, __A, __B, __C); +} + +__m512bh test_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_fmsubne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + return _mm512_fmsubne_pbh(__A, __B, __C); +} + +__m512bh test_mm512_mask_fmsubne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_mask_fmsubne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_fmsubne_pbh(__A, __U, __B, __C); +} + +__m512bh test_mm512_mask3_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + // CHECK-LABEL: @test_mm512_mask3_fmsubne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask3_fmsubne_pbh(__A, __B, __C, __U); +} + +__m512bh test_mm512_maskz_fmsubne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_maskz_fmsubne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_fmsubne_pbh(__U, __A, __B, __C); +} + +__m512bh test_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + return _mm512_fnmaddne_pbh(__A, __B, __C); +} + +__m512bh test_mm512_mask_fnmaddne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_mask_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_fnmaddne_pbh(__A, __U, __B, __C); +} + +__m512bh test_mm512_mask3_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + // CHECK-LABEL: @test_mm512_mask3_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask3_fnmaddne_pbh(__A, __B, __C, __U); +} + +__m512bh test_mm512_maskz_fnmaddne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_maskz_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_fnmaddne_pbh(__U, __A, __B, __C); +} + +__m512bh test_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + return _mm512_fnmsubne_pbh(__A, __B, __C); +} + +__m512bh test_mm512_mask_fnmsubne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_mask_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask_fnmsubne_pbh(__A, __U, __B, __C); +} + +__m512bh test_mm512_mask3_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + // CHECK-LABEL: @test_mm512_mask3_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_mask3_fnmsubne_pbh(__A, __B, __C, __U); +} + +__m512bh test_mm512_maskz_fnmsubne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + // CHECK-LABEL: @test_mm512_maskz_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}) + // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}} + return _mm512_maskz_fnmsubne_pbh(__U, __A, __B, __C); +} diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c new file mode 100644 index 00000000000000..cd94edcf58ea2f --- /dev/null +++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c @@ -0,0 +1,2082 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s + +#include + +__m256bh test_mm256_setzero_pbh() { + // CHECK-LABEL: @test_mm256_setzero_pbh + // CHECK: zeroinitializer + return _mm256_setzero_pbh(); +} + +__m128bh test_mm_setzero_pbh() { + // CHECK-LABEL: @test_mm_setzero_pbh + // CHECK: zeroinitializer + return _mm_setzero_pbh(); +} + +__m256bh test_mm256_undefined_pbh(void) { + // CHECK-LABEL: @test_mm256_undefined_pbh + // CHECK: ret <16 x bfloat> zeroinitializer + return _mm256_undefined_pbh(); +} + +__m128bh test_mm_undefined_pbh(void) { + // CHECK-LABEL: @test_mm_undefined_pbh + // CHECK: ret <8 x bfloat> zeroinitializer + return _mm_undefined_pbh(); +} + +__bf16 test_mm_cvtsbh_bf16(__m128bh __A) { + // CHECK-LABEL: @test_mm_cvtsbh_bf16 + // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0 + return _mm_cvtsbh_bf16(__A); +} + +__bf16 test_mm256_cvtsbh_bf16(__m256bh __A) { + // CHECK-LABEL: @test_mm256_cvtsbh_bf16 + // CHECK: extractelement <16 x bfloat> %{{.*}}, i32 0 + return _mm256_cvtsbh_bf16(__A); +} + +__m128bh test_mm_set_sbh(__bf16 h) { + // CHECK-LABEL: @test_mm_set_sbh + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 1 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 2 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 3 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 4 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 5 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 6 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 7 + return _mm_set_sbh(h); +} + +__m128bh test_mm_set1_pbh(__bf16 h) { + // CHECK-LABEL: @test_mm_set1_pbh + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7 + return _mm_set1_pbh(h); +} + +__m256bh test_mm256_set1_pbh(__bf16 h) { + // CHECK-LABEL: @test_mm256_set1_pbh + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15 + return _mm256_set1_pbh(h); +} + +__m128bh test_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, + __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) { + // CHECK-LABEL: @test_mm_set_pbh + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7 + return _mm_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8); +} + +__m256bh test_mm256_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, + __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8, + __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) { + // CHECK-LABEL: @test_mm256_set_pbh + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15 + return _mm256_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, + bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16); +} + +__m128bh test_mm_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, + __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) { + // CHECK-LABEL: @test_mm_setr_pbh + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7 + return _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8); +} + +__m256bh test_mm256_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, + __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8, + __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) { + // CHECK-LABEL: @test_mm256_setr_pbh + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14 + // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15 + return _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, + bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16); +} + +__m128 test_mm_castpbf16_ps(__m128bh A) { + // CHECK-LABEL: test_mm_castpbf16_ps + // CHECK: bitcast <8 x bfloat> %{{.*}} to <4 x float> + return _mm_castpbf16_ps(A); +} + +__m256 test_mm256_castpbf16_ps(__m256bh A) { + // CHECK-LABEL: test_mm256_castpbf16_ps + // CHECK: bitcast <16 x bfloat> %{{.*}} to <8 x float> + return _mm256_castpbf16_ps(A); +} + +__m128i test_mm_castpbf16_si128(__m128bh A) { + // CHECK-LABEL: test_mm_castpbf16_si128 + // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x i64> + return _mm_castpbf16_si128(A); +} + +__m256i test_mm256_castpbf16_si256(__m256bh A) { + // CHECK-LABEL: test_mm256_castpbf16_si256 + // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x i64> + return _mm256_castpbf16_si256(A); +} + +__m128bh test_mm_castps_pbh(__m128 A) { + // CHECK-LABEL: test_mm_castps_pbh + // CHECK: bitcast <4 x float> %{{.*}} to <8 x bfloat> + return _mm_castps_pbh(A); +} + +__m256bh test_mm256_castps_pbh(__m256 A) { + // CHECK-LABEL: test_mm256_castps_pbh + // CHECK: bitcast <8 x float> %{{.*}} to <16 x bfloat> + return _mm256_castps_pbh(A); +} + +__m128bh test_mm_castpd_pbh(__m128d A) { + // CHECK-LABEL: test_mm_castpd_pbh + // CHECK: bitcast <2 x double> %{{.*}} to <8 x bfloat> + return _mm_castpd_pbh(A); +} + +__m256bh test_mm256_castpd_pbh(__m256d A) { + // CHECK-LABEL: test_mm256_castpd_pbh + // CHECK: bitcast <4 x double> %{{.*}} to <16 x bfloat> + return _mm256_castpd_pbh(A); +} + +__m128bh test_mm_castsi128_pbh(__m128i A) { + // CHECK-LABEL: test_mm_castsi128_pbh + // CHECK: bitcast <2 x i64> %{{.*}} to <8 x bfloat> + return _mm_castsi128_pbh(A); +} + +__m256bh test_mm256_castsi256_pbh(__m256i A) { + // CHECK-LABEL: test_mm256_castsi256_pbh + // CHECK: bitcast <4 x i64> %{{.*}} to <16 x bfloat> + return _mm256_castsi256_pbh(A); +} + +__m128d test_mm_castpbf16_pd(__m128bh A) { + // CHECK-LABEL: test_mm_castpbf16_pd + // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x double> + return _mm_castpbf16_pd(A); +} + +__m128bh test_mm256_castpbf16256_pbh128(__m256bh __a) { + // CHECK-LABEL: test_mm256_castpbf16256_pbh128 + // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <8 x i32> + return _mm256_castpbf16256_pbh128(__a); +} + +__m256bh test_mm256_castpbf16128_pbh256(__m128bh __a) { + // CHECK-LABEL: test_mm256_castpbf16128_pbh256 + // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <16 x i32> + return _mm256_castpbf16128_pbh256(__a); +} + +__m256d test_mm256_castpbf16_pd(__m256bh A) { + // CHECK-LABEL: test_mm256_castpbf16_pd + // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x double> + return _mm256_castpbf16_pd(A); +} + +__m256bh test_mm256_zextpbf16128_pbh256(__m128bh __a) { + // CHECK-LABEL: test_mm256_zextpbf16128_pbh256 + // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <16 x i32> + return _mm256_zextpbf16128_pbh256(__a); +} + +__m128bh test_mm_abs_pbh(__m128bh a) { + // CHECK-LABEL: @test_mm_abs_pbh + // CHECK: and <4 x i32> + return _mm_abs_pbh(a); +} + +__m256bh test_mm256_abs_pbh(__m256bh a) { + // CHECK-LABEL: @test_mm256_abs_pbh + // CHECK: and <8 x i32> + return _mm256_abs_pbh(a); +} + +__m256bh test_mm256_loadu_pbh(void *p) { + // CHECK-LABEL: @test_mm256_loadu_pbh + // CHECK: load <16 x bfloat>, ptr {{.*}}, align 1{{$}} + return _mm256_loadu_pbh(p); +} + +__m128bh test_mm_load_sbh(void const *A) { + // CHECK-LABEL: test_mm_load_sbh + // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> to <8 x i1>), <8 x bfloat> %{{.*}}) + return _mm_load_sbh(A); +} + +__m256bh test_mm256_load_pbh(void *p) { + // CHECK-LABEL: @test_mm256_load_pbh + // CHECK: load <16 x bfloat>, ptr %{{.*}}, align 32 + return _mm256_load_pbh(p); +} + +__m128bh test_mm_load_pbh(void *p) { + // CHECK-LABEL: @test_mm_load_pbh + // CHECK: load <8 x bfloat>, ptr %{{.*}}, align 16 + return _mm_load_pbh(p); +} + +__m128bh test_mm_loadu_pbh(void *p) { + // CHECK-LABEL: @test_mm_loadu_pbh + // CHECK: load <8 x bfloat>, ptr {{.*}}, align 1{{$}} + return _mm_loadu_pbh(p); +} + +void test_mm_store_sbh(void *A, __m128bh B) { + // CHECK-LABEL: test_mm_store_sbh + // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0 + // CHECK: store bfloat %{{.*}}, ptr %{{.*}}, align 1{{$}} + _mm_store_sbh(A, B); +} + +void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_store_sbh + // CHECK: call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}) + _mm_mask_store_sbh(__P, __U, __A); +} + +void test_mm256_store_pbh(void *p, __m256bh a) { + // CHECK-LABEL: @test_mm256_store_pbh + // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 32 + _mm256_store_pbh(p, a); +} + +void test_mm_store_pbh(void *p, __m128bh a) { + // CHECK-LABEL: @test_mm_store_pbh + // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 16 + _mm_store_pbh(p, a); +} + +__m128bh test_mm_mask_load_sbh(__m128bh __A, __mmask8 __U, const void *__W) { + // CHECK-LABEL: @test_mm_mask_load_sbh + // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}) + return _mm_mask_load_sbh(__A, __U, __W); +} + +__m128bh test_mm_maskz_load_sbh(__mmask8 __U, const void *__W) { + // CHECK-LABEL: @test_mm_maskz_load_sbh + // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}) + return _mm_maskz_load_sbh(__U, __W); +} + +void test_mm256_storeu_pbh(void *p, __m256bh a) { + // CHECK-LABEL: @test_mm256_storeu_pbh + // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}} + // CHECK-NEXT: ret void + _mm256_storeu_pbh(p, a); +} + +void test_mm_storeu_pbh(void *p, __m128bh a) { + // CHECK-LABEL: @test_mm_storeu_pbh + // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}} + // CHECK-NEXT: ret void + _mm_storeu_pbh(p, a); +} + +__m128bh test_mm_move_sbh(__m128bh A, __m128bh B) { + // CHECK-LABEL: test_mm_move_sbh + // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 0 + return _mm_move_sbh(A, B); +} + +__m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mask_move_sbh + // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0 + // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0 + // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]] + // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0 + return _mm_mask_move_sbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_maskz_move_sbh + // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0 + // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0 + // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0 + // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]] + // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0 + return _mm_maskz_move_sbh(__U, __A, __B); +} + +__m128bh test_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) { + // CHECK-LABEL: @test_mm_mask_blend_pbh + // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask_blend_pbh(__U, __A, __W); +} + +__m256bh test_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) { + // CHECK-LABEL: @test_mm256_mask_blend_pbh + // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask_blend_pbh(__U, __A, __W); +} + +__m128bh test_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) { + // CHECK-LABEL: @test_mm_permutex2var_pbh + // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16> + // CHECK: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16> + // CHECK: %{{.*}} = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat> + return _mm_permutex2var_pbh(__A, __I, __B); +} + +__m256bh test_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) { + // CHECK-LABEL: @test_mm256_permutex2var_pbh + // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16> + // CHECK: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> + // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16> + // CHECK: %{{.*}} = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) + // CHECK: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat> + return _mm256_permutex2var_pbh(__A, __I, __B); +} + +__m128bh test_mm_permutexvar_pbh(__m128i __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_permutexvar_pbh + // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16> + // CHECK: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // CHECK: %{{.*}} = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat> + return _mm_permutexvar_pbh(__A, __B); +} + +__m256bh test_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_permutexvar_pbh + // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16> + // CHECK: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> + // CHECK: %{{.*}} = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) + // CHECK: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat> + return _mm256_permutexvar_pbh(__A, __B); +} + +__m256bh test_mm256_addne_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_addne_pbh + // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_addne_pbh(__A, __B); +} + +__m256bh test_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_addne_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_addne_pbh(__U, __A, __B); +} + +__m128bh test_mm_addne_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_addne_pbh + // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_addne_pbh(__A, __B); +} + +__m128bh test_mm_mask_addne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_addne_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_addne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_addne_pbh(__U, __A, __B); +} + +__m256bh test_mm256_subne_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_subne_pbh + // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_subne_pbh(__A, __B); +} + +__m256bh test_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_subne_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_subne_pbh(__U, __A, __B); +} + +__m128bh test_mm_subne_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_subne_pbh + // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_subne_pbh(__A, __B); +} + +__m128bh test_mm_mask_subne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_subne_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_subne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_subne_pbh(__U, __A, __B); +} + +__m256bh test_mm256_mulne_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_mulne_pbh + // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_mulne_pbh(__A, __B); +} + +__m256bh test_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_mulne_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_mulne_pbh(__U, __A, __B); +} + +__m128bh test_mm_mulne_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mulne_pbh + // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_mulne_pbh(__A, __B); +} + +__m128bh test_mm_mask_mulne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_mulne_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_mulne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_mulne_pbh(__U, __A, __B); +} + +__m256bh test_mm256_divne_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_divne_pbh + // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_divne_pbh(__A, __B); +} + +__m256bh test_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_divne_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_divne_pbh(__U, __A, __B); +} + +__m128bh test_mm_divne_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_divne_pbh + // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_divne_pbh(__A, __B); +} + +__m128bh test_mm_mask_divne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_divne_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_divne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_divne_pbh(__U, __A, __B); +} + +__m256bh test_mm256_max_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_max_pbh + // CHECK: @llvm.x86.avx10.vmaxpbf16256( + return _mm256_max_pbh(__A, __B); +} + +__m256bh test_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: @llvm.x86.avx10.vmaxpbf16256 + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_max_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: @llvm.x86.avx10.vmaxpbf16256 + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_max_pbh(__U, __A, __B); +} + +__m128bh test_mm_max_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_max_pbh + // CHECK: @llvm.x86.avx10.vmaxpbf16128( + return _mm_max_pbh(__A, __B); +} + +__m128bh test_mm_mask_max_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: @llvm.x86.avx10.vmaxpbf16128 + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_max_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_max_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: @llvm.x86.avx10.vmaxpbf16128 + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_max_pbh(__U, __A, __B); +} + +__m256bh test_mm256_min_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_min_pbh + // CHECK: @llvm.x86.avx10.vminpbf16256( + return _mm256_min_pbh(__A, __B); +} + +__m256bh test_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: @llvm.x86.avx10.vminpbf16256 + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_min_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK: @llvm.x86.avx10.vminpbf16256 + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_min_pbh(__U, __A, __B); +} + +__m128bh test_mm_min_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_min_pbh + // CHECK: @llvm.x86.avx10.vminpbf16128( + return _mm_min_pbh(__A, __B); +} + +__m128bh test_mm_mask_min_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: @llvm.x86.avx10.vminpbf16128 + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_min_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_min_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) { + // CHECK: @llvm.x86.avx10.vminpbf16128 + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_min_pbh(__U, __A, __B); +} + +int test_mm_comeqsbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: test_mm_comeqsbh + // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}}) + return _mm_comeqsbh(__A, __B); +} + +int test_mm_comltsbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: test_mm_comltsbh + // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}}) + return _mm_comltsbh(__A, __B); +} + +int test_mm_comlesbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: test_mm_comlesbh + // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}}) + return _mm_comlesbh(__A, __B); +} + +int test_mm_comgtsbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: test_mm_comgtsbh + // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}}) + return _mm_comgtsbh(__A, __B); +} + +int test_mm_comgesbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: test_mm_comgesbh + // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}}) + return _mm_comgesbh(__A, __B); +} + +int test_mm_comneqsbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: test_mm_comneqsbh + // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}}) + return _mm_comneqsbh(__A, __B); +} + +__mmask16 test_mm256_cmp_pbh_mask_eq_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: @test_mm256_cmp_pbh_mask_eq_oq + // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_lt_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_os + // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_le_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_os + // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_unord_q(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_q + // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_Q); +} + +__mmask16 test_mm256_cmp_pbh_mask_neq_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_uq + // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_nlt_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_us + // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_US); +} + +__mmask16 test_mm256_cmp_pbh_mask_nle_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_us + // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_US); +} + +__mmask16 test_mm256_cmp_pbh_mask_ord_q(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_q + // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_Q); +} + +__mmask16 test_mm256_cmp_pbh_mask_eq_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_uq + // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_nge_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_us + // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_US); +} + +__mmask16 test_mm256_cmp_pbh_mask_ngt_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_us + // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_US); +} + +__mmask16 test_mm256_cmp_pbh_mask_false_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_oq + // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_neq_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_oq + // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_ge_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_os + // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_gt_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_os + // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_true_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_uq + // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_eq_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_os + // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_lt_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_oq + // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_le_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_oq + // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_unord_s(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_s + // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_S); +} + +__mmask16 test_mm256_cmp_pbh_mask_neq_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_us + // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_US); +} + +__mmask16 test_mm256_cmp_pbh_mask_nlt_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_uq + // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_nle_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_uq + // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_ord_s(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_s + // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_S); +} + +__mmask16 test_mm256_cmp_pbh_mask_eq_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_us + // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_US); +} + +__mmask16 test_mm256_cmp_pbh_mask_nge_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_uq + // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_ngt_uq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_uq + // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_UQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_false_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_os + // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_neq_os(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_os + // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OS); +} + +__mmask16 test_mm256_cmp_pbh_mask_ge_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_oq + // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_gt_oq(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_oq + // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OQ); +} + +__mmask16 test_mm256_cmp_pbh_mask_true_us(__m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_us + // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}} + return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_eq_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: @test_mm256_mask_cmp_pbh_mask_eq_oq + // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_lt_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_os + // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_le_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_os + // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_unord_q(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_q + // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_neq_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_uq + // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_us + // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_nle_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_us + // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_ord_q(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_q + // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_eq_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_uq + // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_nge_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_us + // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_us + // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_false_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_oq + // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_neq_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_oq + // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_ge_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_os + // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_gt_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_os + // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_true_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_uq + // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_eq_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_os + // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_lt_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_oq + // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_le_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_oq + // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_unord_s(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_s + // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_neq_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_us + // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_uq + // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_nle_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_uq + // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_ord_s(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_s + // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_eq_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_us + // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_nge_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_uq + // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_uq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_uq + // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_false_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_os + // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_neq_os(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_os + // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_ge_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_oq + // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_gt_oq(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_oq + // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ); +} + +__mmask16 test_mm256_mask_cmp_pbh_mask_true_us(__mmask16 m, __m256bh a, __m256bh b) { + // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_us + // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <16 x i1> %{{.*}}, %{{.*}} + return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US); +} + +__mmask8 test_mm_cmp_pbh_mask_eq_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: @test_mm_cmp_pbh_mask_eq_oq + // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_lt_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_os + // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_LT_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_le_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_le_os + // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_LE_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_unord_q(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_q + // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_Q); +} + +__mmask8 test_mm_cmp_pbh_mask_neq_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_uq + // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_nlt_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_us + // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NLT_US); +} + +__mmask8 test_mm_cmp_pbh_mask_nle_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_us + // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NLE_US); +} + +__mmask8 test_mm_cmp_pbh_mask_ord_q(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_q + // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_ORD_Q); +} + +__mmask8 test_mm_cmp_pbh_mask_eq_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_uq + // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_EQ_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_nge_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_us + // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NGE_US); +} + +__mmask8 test_mm_cmp_pbh_mask_ngt_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_us + // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NGT_US); +} + +__mmask8 test_mm_cmp_pbh_mask_false_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_false_oq + // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_neq_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_oq + // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_ge_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_os + // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_GE_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_gt_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_os + // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_GT_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_true_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_true_uq + // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_eq_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_os + // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_lt_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_oq + // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_LT_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_le_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_le_oq + // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_LE_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_unord_s(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_s + // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_S); +} + +__mmask8 test_mm_cmp_pbh_mask_neq_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_us + // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_US); +} + +__mmask8 test_mm_cmp_pbh_mask_nlt_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_uq + // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NLT_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_nle_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_uq + // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NLE_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_ord_s(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_s + // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_ORD_S); +} + +__mmask8 test_mm_cmp_pbh_mask_eq_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_us + // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_EQ_US); +} + +__mmask8 test_mm_cmp_pbh_mask_nge_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_uq + // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NGE_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_ngt_uq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_uq + // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NGT_UQ); +} + +__mmask8 test_mm_cmp_pbh_mask_false_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_false_os + // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_neq_os(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_os + // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OS); +} + +__mmask8 test_mm_cmp_pbh_mask_ge_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_oq + // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_GE_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_gt_oq(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_oq + // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_GT_OQ); +} + +__mmask8 test_mm_cmp_pbh_mask_true_us(__m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_cmp_pbh_mask_true_us + // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}} + return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_eq_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: @test_mm_mask_cmp_pbh_mask_eq_oq + // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_lt_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_os + // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_le_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_os + // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_unord_q(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_q + // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_neq_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_uq + // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_nlt_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_us + // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_nle_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_us + // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_ord_q(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_q + // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_eq_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_uq + // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_nge_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_us + // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_ngt_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_us + // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_false_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_oq + // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_neq_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_oq + // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_ge_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_os + // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_gt_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_os + // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_true_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_uq + // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_eq_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_os + // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_lt_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_oq + // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_le_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_oq + // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_unord_s(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_s + // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_neq_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_us + // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_nlt_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_uq + // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_nle_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_uq + // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_ord_s(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_s + // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_eq_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_us + // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_nge_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_uq + // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_ngt_uq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_uq + // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_false_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_os + // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_neq_os(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_os + // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_ge_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_oq + // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_gt_oq(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_oq + // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ); +} + +__mmask8 test_mm_mask_cmp_pbh_mask_true_us(__mmask8 m, __m128bh a, __m128bh b) { + // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_us + // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}} + // CHECK: and <8 x i1> %{{.*}}, %{{.*}} + return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US); +} + + +__mmask16 test_mm256_mask_fpclass_pbh_mask(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_fpclass_pbh_mask + // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256 + return _mm256_mask_fpclass_pbh_mask(__U, __A, 4); +} + +__mmask16 test_mm256_fpclass_pbh_mask(__m256bh __A) { + // CHECK-LABEL: @test_mm256_fpclass_pbh_mask + // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256 + return _mm256_fpclass_pbh_mask(__A, 4); +} + +__mmask8 test_mm_mask_fpclass_pbh_mask(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_fpclass_pbh_mask + // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128 + return _mm_mask_fpclass_pbh_mask(__U, __A, 4); +} + +__mmask8 test_mm_fpclass_pbh_mask(__m128bh __A) { + // CHECK-LABEL: @test_mm_fpclass_pbh_mask + // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128 + return _mm_fpclass_pbh_mask(__A, 4); +} + +__m256bh test_mm256_scalef_pbh(__m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256 + return _mm256_scalef_pbh(__A, __B); +} + +__m256bh test_mm256_mask_scalef_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_mask_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256 + return _mm256_mask_scalef_pbh(__W, __U, __A, __B); +} + +__m256bh test_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_maskz_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256 + return _mm256_maskz_scalef_pbh(__U, __A, __B); +} + +__m256bh test_mm256_rcp_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256 + return _mm256_rcp_pbh(__A); +} + +__m256bh test_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256 + return (__m256bh)_mm256_mask_rcp_pbh(__W, __U, __A); +} + +__m256bh test_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256 + return _mm256_maskz_rcp_pbh(__U, __A); +} + +__m256bh test_mm256_getexp_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256 + return _mm256_getexp_pbh(__A); +} + +__m256bh test_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256 + return _mm256_mask_getexp_pbh(__W, __U, __A); +} + +__m256bh test_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256 + return _mm256_maskz_getexp_pbh(__U, __A); +} + +__m256bh test_mm256_rsqrt_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256 + return _mm256_rsqrt_pbh(__A); +} + +__m256bh test_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256 + return (__m256bh)_mm256_mask_rsqrt_pbh(__W, __U, __A); +} + +__m256bh test_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256 + return _mm256_maskz_rsqrt_pbh(__U, __A); +} + +__m256bh test_mm256_reducene_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256 + return _mm256_reducene_pbh(__A, 3); +} + +__m256bh test_mm256_mask_reducene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256 + return _mm256_mask_reducene_pbh(__W, __U, __A, 1); +} + +__m256bh test_mm256_maskz_reducene_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256 + return _mm256_maskz_reducene_pbh(__U, __A, 1); +} + +__m256bh test_mm256_roundscalene_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256 + return _mm256_roundscalene_pbh(__A, 3); +} + +__m256bh test_mm256_mask_roundscalene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256 + return _mm256_mask_roundscalene_pbh(__W, __U, __A, 1); +} + +__m256bh test_mm256_maskz_roundscalene_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256 + return _mm256_maskz_roundscalene_pbh(__U, __A, 1 ); +} + +__m256bh test_mm256_getmant_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256 + return _mm256_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m256bh test_mm256_mask_getmant_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256 + return _mm256_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m256bh test_mm256_maskz_getmant_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256 + return _mm256_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m256bh test_mm256_sqrt_pbh(__m256bh __A) { + // CHECK-LABEL: @test_mm256_sqrt_pbh + // CHECK: call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %{{.*}}) + return _mm256_sqrt_pbh(__A); +} + +__m256bh test_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_mask_sqrt_pbh + // CHECK: @llvm.sqrt.v16bf16 + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return (__m256bh)_mm256_mask_sqrt_pbh(__W, __U, __A); +} + +__m256bh test_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) { + // CHECK-LABEL: @test_mm256_maskz_sqrt_pbh + // CHECK: @llvm.sqrt.v16bf16 + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_sqrt_pbh(__U, __A); +} + +__m128bh test_mm_scalef_pbh(__m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128 + return _mm_scalef_pbh(__A, __B); +} + +__m128bh test_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mask_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128 + return _mm_mask_scalef_pbh(__W, __U, __A, __B); +} + +__m128bh test_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_maskz_scalef_pbh + // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128 + return _mm_maskz_scalef_pbh(__U, __A, __B); +} + +__m128bh test_mm_rcp_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128 + return _mm_rcp_pbh(__A); +} + +__m128bh test_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128 + return (__m128bh)_mm_mask_rcp_pbh(__W, __U, __A); +} + +__m128bh test_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_rcp_pbh + // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128 + return _mm_maskz_rcp_pbh(__U, __A); +} + +__m128bh test_mm_getexp_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128 + return _mm_getexp_pbh(__A); +} + +__m128bh test_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128 + return _mm_mask_getexp_pbh(__W, __U, __A); +} + +__m128bh test_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_getexp_pbh + // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128 + return _mm_maskz_getexp_pbh(__U, __A); +} + +__m128bh test_mm_rsqrt_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128 + return _mm_rsqrt_pbh(__A); +} + +__m128bh test_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128 + return (__m128bh)_mm_mask_rsqrt_pbh(__W, __U, __A); +} + +__m128bh test_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_rsqrt_pbh + // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128 + return _mm_maskz_rsqrt_pbh(__U, __A); +} + +__m128bh test_mm_reducene_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128 + return _mm_reducene_pbh(__A, 3); +} + +__m128bh test_mm_mask_reducene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128 + return _mm_mask_reducene_pbh(__W, __U, __A, 1); +} + +__m128bh test_mm_maskz_reducene_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_reducene_pbh + // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128 + return _mm_maskz_reducene_pbh(__U, __A, 1); +} + +__m128bh test_mm_roundscalene_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128 + return _mm_roundscalene_pbh(__A, 3); +} + +__m128bh test_mm_mask_roundscalene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128 + return _mm_mask_roundscalene_pbh(__W, __U, __A, 1); +} + +__m128bh test_mm_maskz_roundscalene_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_roundscalene_pbh + // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128 + return _mm_maskz_roundscalene_pbh(__U, __A, 1 ); +} + +__m128bh test_mm_getmant_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128 + return _mm_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m128bh test_mm_mask_getmant_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128 + return _mm_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m128bh test_mm_maskz_getmant_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_getmant_pbh + // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128 + return _mm_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); +} + +__m128bh test_mm_sqrt_pbh(__m128bh __A) { + // CHECK-LABEL: @test_mm_sqrt_pbh + // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}}) + return _mm_sqrt_pbh(__A); +} + +__m128bh test_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_mask_sqrt_pbh + // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return (__m128bh)_mm_mask_sqrt_pbh(__W, __U, __A); +} + +__m128bh test_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { + // CHECK-LABEL: @test_mm_maskz_sqrt_pbh + // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_sqrt_pbh(__U, __A); +} + +__m256bh test_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_fmaddne_pbh + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + return _mm256_fmaddne_pbh(__A, __B, __C); +} + +__m256bh test_mm256_mask_fmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_mask_fmaddne_pbh + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask_fmaddne_pbh(__A, __U, __B, __C); +} + +__m256bh test_mm256_mask3_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + // CHECK-LABEL: @test_mm256_mask3_fmaddne_pbh + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask3_fmaddne_pbh(__A, __B, __C, __U); +} + +__m256bh test_mm256_maskz_fmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_maskz_fmaddne_pbh + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_fmaddne_pbh(__U, __A, __B, __C); +} + +__m256bh test_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_fmsubne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + return _mm256_fmsubne_pbh(__A, __B, __C); +} + +__m256bh test_mm256_mask_fmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_mask_fmsubne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask_fmsubne_pbh(__A, __U, __B, __C); +} + +__m256bh test_mm256_mask3_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + // CHECK-LABEL: @test_mm256_mask3_fmsubne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask3_fmsubne_pbh(__A, __B, __C, __U); +} + +__m256bh test_mm256_maskz_fmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_maskz_fmsubne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_fmsubne_pbh(__U, __A, __B, __C); +} + +__m256bh test_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + return _mm256_fnmaddne_pbh(__A, __B, __C); +} + +__m256bh test_mm256_mask_fnmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_mask_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask_fnmaddne_pbh(__A, __U, __B, __C); +} + +__m256bh test_mm256_mask3_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + // CHECK-LABEL: @test_mm256_mask3_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask3_fnmaddne_pbh(__A, __B, __C, __U); +} + +__m256bh test_mm256_maskz_fnmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_maskz_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_fnmaddne_pbh(__U, __A, __B, __C); +} + +__m256bh test_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + return _mm256_fnmsubne_pbh(__A, __B, __C); +} + +__m256bh test_mm256_mask_fnmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_mask_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask_fnmsubne_pbh(__A, __U, __B, __C); +} + +__m256bh test_mm256_mask3_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + // CHECK-LABEL: @test_mm256_mask3_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_mask3_fnmsubne_pbh(__A, __B, __C, __U); +} + +__m256bh test_mm256_maskz_fnmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + // CHECK-LABEL: @test_mm256_maskz_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}) + // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}} + return _mm256_maskz_fnmsubne_pbh(__U, __A, __B, __C); +} + +__m128bh test_mm_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_fmaddne_pbh + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + return _mm_fmaddne_pbh(__A, __B, __C); +} + +__m128bh test_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_mask_fmaddne_pbh + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask_fmaddne_pbh(__A, __U, __B, __C); +} + +__m128bh test_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + // CHECK-LABEL: @test_mm_mask3_fmaddne_pbh + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask3_fmaddne_pbh(__A, __B, __C, __U); +} + +__m128bh test_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_maskz_fmaddne_pbh + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_fmaddne_pbh(__U, __A, __B, __C); +} + +__m128bh test_mm_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_fmsubne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + return _mm_fmsubne_pbh(__A, __B, __C); +} + +__m128bh test_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_mask_fmsubne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask_fmsubne_pbh(__A, __U, __B, __C); +} + +__m128bh test_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + // CHECK-LABEL: @test_mm_mask3_fmsubne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask3_fmsubne_pbh(__A, __B, __C, __U); +} + +__m128bh test_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_maskz_fmsubne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_fmsubne_pbh(__U, __A, __B, __C); +} + +__m128bh test_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + return _mm_fnmaddne_pbh(__A, __B, __C); +} + +__m128bh test_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_mask_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask_fnmaddne_pbh(__A, __U, __B, __C); +} + +__m128bh test_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + // CHECK-LABEL: @test_mm_mask3_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask3_fnmaddne_pbh(__A, __B, __C, __U); +} + +__m128bh test_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_maskz_fnmaddne_pbh + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_fnmaddne_pbh(__U, __A, __B, __C); +} + +__m128bh test_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + return _mm_fnmsubne_pbh(__A, __B, __C); +} + +__m128bh test_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_mask_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask_fnmsubne_pbh(__A, __U, __B, __C); +} + +__m128bh test_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + // CHECK-LABEL: @test_mm_mask3_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_mask3_fnmsubne_pbh(__A, __B, __C, __U); +} + +__m128bh test_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + // CHECK-LABEL: @test_mm_maskz_fnmsubne_pbh + // CHECK: fneg + // CHECK: fneg + // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}} + return _mm_maskz_fnmsubne_pbh(__U, __A, __B, __C); +} diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 8d000ed1e4f859..fafa5051bfb1b4 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -7219,3 +7219,256 @@ def int_x86_avx10_mask_vcvtneph2hf8s512 : ClangBuiltin<"__builtin_ia32_vcvtneph2 DefaultAttrsIntrinsic<[llvm_v32i8_ty], [llvm_v32f16_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; } + +//===----------------------------------------------------------------------===// +let TargetPrefix = "x86" in { +def int_x86_avx10_vaddnepbf16512 : ClangBuiltin<"__builtin_ia32_vaddnepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vaddnepbf16256 : ClangBuiltin<"__builtin_ia32_vaddnepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vaddnepbf16128 : ClangBuiltin<"__builtin_ia32_vaddnepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vsubnepbf16512 : ClangBuiltin<"__builtin_ia32_vsubnepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vsubnepbf16256 : ClangBuiltin<"__builtin_ia32_vsubnepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vsubnepbf16128 : ClangBuiltin<"__builtin_ia32_vsubnepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vmulnepbf16512 : ClangBuiltin<"__builtin_ia32_vmulnepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vmulnepbf16256 : ClangBuiltin<"__builtin_ia32_vmulnepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vmulnepbf16128 : ClangBuiltin<"__builtin_ia32_vmulnepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vdivnepbf16512 : ClangBuiltin<"__builtin_ia32_vdivnepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vdivnepbf16256 : ClangBuiltin<"__builtin_ia32_vdivnepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vdivnepbf16128 : ClangBuiltin<"__builtin_ia32_vdivnepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vmaxpbf16512 : ClangBuiltin<"__builtin_ia32_vmaxpbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vmaxpbf16256 : ClangBuiltin<"__builtin_ia32_vmaxpbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vmaxpbf16128 : ClangBuiltin<"__builtin_ia32_vmaxpbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vminpbf16512 : ClangBuiltin<"__builtin_ia32_vminpbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vminpbf16256 : ClangBuiltin<"__builtin_ia32_vminpbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vminpbf16128 : ClangBuiltin<"__builtin_ia32_vminpbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vcomsbf16eq : ClangBuiltin<"__builtin_ia32_vcomsbf16eq">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vcomsbf16lt : ClangBuiltin<"__builtin_ia32_vcomsbf16lt">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty,llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vcomsbf16le : ClangBuiltin<"__builtin_ia32_vcomsbf16le">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vcomsbf16gt : ClangBuiltin<"__builtin_ia32_vcomsbf16gt">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vcomsbf16ge : ClangBuiltin<"__builtin_ia32_vcomsbf16ge">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vcomsbf16neq : ClangBuiltin<"__builtin_ia32_vcomsbf16neq">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_rsqrt_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_rsqrt_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_rsqrt_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16512_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_rcp_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrcppbf16128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_rcp_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vrcppbf16256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_rcp_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vrcppbf16512_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_reduce_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vreducenepbf16128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_reduce_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vreducenepbf16256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_reduce_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vreducenepbf16512_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_fpclass_nepbf16_128 : + DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_v8bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_fpclass_nepbf16_256 : + DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_v16bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_fpclass_nepbf16_512 : + DefaultAttrsIntrinsic<[llvm_v32i1_ty], [llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_getexp_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vgetexppbf16128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_getexp_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vgetexppbf16256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_getexp_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vgetexppbf16512_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_getmant_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vgetmantpbf16128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_getmant_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vgetmantpbf16256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_getmant_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vgetmantpbf16512_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_rndscale_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_rndscale_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_rndscale_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_x86_avx10_mask_scalef_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vscalefpbf16128_mask">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_scalef_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vscalefpbf16256_mask">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty], + [IntrNoMem]>; +def int_x86_avx10_mask_scalef_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vscalefpbf16512_mask">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmadd213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmadd213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfmadd132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmadd132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmadd132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfmadd231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmadd231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmadd231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfmsub213nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmsub213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmsub213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfmsub132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmsub132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmsub132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfmsub231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmsub231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd213nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmadd231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub213nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; +def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], + [IntrNoMem]>; +} diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 2b6b0ad16bcf76..03f49306c2b7b5 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3305,11 +3305,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) && (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") || PatchedName.ends_with("sh") || PatchedName.ends_with("ph") || - PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) { + PatchedName.ends_with("pbf16") || PatchedName.ends_with("ps") || + PatchedName.ends_with("pd"))) { bool IsVCMP = PatchedName[0] == 'v'; unsigned CCIdx = IsVCMP ? 4 : 3; + unsigned suffixLength = PatchedName.ends_with("pbf16") ? 5 : 2; unsigned CC = StringSwitch( - PatchedName.slice(CCIdx, PatchedName.size() - 2)) + PatchedName.slice(CCIdx, PatchedName.size() - suffixLength)) .Case("eq", 0x00) .Case("eq_oq", 0x00) .Case("lt", 0x01) @@ -3372,6 +3374,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, PatchedName = "vcmpsh"; else if (PatchedName.ends_with("ph")) PatchedName = "vcmpph"; + else if (PatchedName.ends_with("pbf16")) + PatchedName = "vcmppbf16"; else llvm_unreachable("Unexpected suffix!"); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index 33104524c5a890..8fcc1c10d93a04 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -167,6 +167,15 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI, case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik: case X86::VCMPPHZrrib: case X86::VCMPPHZrribk: case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk: + case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri: + case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri: + case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri: + case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik: + case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik: + case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik: + case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik: + case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik: + case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik: if (Imm >= 0 && Imm <= 31) { OS << '\t'; printCMPMnemonic(MI, /*IsVCMP*/true, OS); @@ -205,7 +214,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI, printwordmem(MI, CurOp--, OS); else printdwordmem(MI, CurOp--, OS); - } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) { + } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD && + (Desc.TSFlags & X86II::OpMapMask) != X86II::TA) { assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA && "Unexpected op map!"); printqwordmem(MI, CurOp--, OS); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index ad1f2dc532d1c2..e7ba13215feb59 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -309,6 +309,17 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp, case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk: OS << "sh\t"; break; + case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri: + case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri: + case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri: + case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik: + case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik: + case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik: + case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik: + case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik: + case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik: + OS << "pbf16\t"; + break; } } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index 7c8459a546516e..39600ffcadd8ee 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -146,6 +146,15 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik: case X86::VCMPPHZrrib: case X86::VCMPPHZrribk: case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk: + case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri: + case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri: + case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri: + case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik: + case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik: + case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik: + case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik: + case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik: + case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik: if (Imm >= 0 && Imm <= 31) { OS << '\t'; printCMPMnemonic(MI, /*IsVCMP*/true, OS); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f7878c78a5231c..451881e1d61415 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2360,6 +2360,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v32bf16, Custom); } + if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) { + addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass); + addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass); + addRegisterClass(MVT::v32bf16, &X86::VR512RegClass); + + setOperationAction(ISD::FADD, MVT::v32bf16, Legal); + setOperationAction(ISD::FSUB, MVT::v32bf16, Legal); + setOperationAction(ISD::FMUL, MVT::v32bf16, Legal); + setOperationAction(ISD::FDIV, MVT::v32bf16, Legal); + setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal); + setOperationAction(ISD::FMA, MVT::v32bf16, Legal); + setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); + if (Subtarget.hasVLX()) { + for (auto VT : {MVT::v8bf16, MVT::v16bf16}) { + setOperationAction(ISD::FADD, VT, Legal); + setOperationAction(ISD::FSUB, VT, Legal); + setOperationAction(ISD::FMUL, VT, Legal); + setOperationAction(ISD::FDIV, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + } + } + } + if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); @@ -12212,7 +12237,8 @@ static bool isShuffleFoldableLoad(SDValue V) { template static bool isSoftF16(T VT, const X86Subtarget &Subtarget) { T EltVT = VT.getScalarType(); - return EltVT == MVT::bf16 || (EltVT == MVT::f16 && !Subtarget.hasFP16()); + return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) || + (EltVT == MVT::f16 && !Subtarget.hasFP16()); } /// Try to lower insertion of a single element into a zero vector. @@ -23265,7 +23291,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, if (isFP) { MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); - assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); + assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 || + EltVT == MVT::f64); if (isSoftF16(EltVT, Subtarget)) return SDValue(); @@ -23282,7 +23309,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, Op0.getSimpleValueType().is512BitVector())) { #ifndef NDEBUG unsigned Num = VT.getVectorNumElements(); - assert(Num <= 16 || (Num == 32 && EltVT == MVT::f16)); + assert(Num <= 16 || + (Num == 32 && (EltVT == MVT::f16 || EltVT == MVT::bf16))); #endif Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM; } else { @@ -54159,7 +54187,8 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, EVT ScalarVT = VT.getScalarType(); if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) && - !(ScalarVT == MVT::f16 && Subtarget.hasFP16())) + !(ScalarVT == MVT::f16 && Subtarget.hasFP16()) && + !(ScalarVT == MVT::bf16 && Subtarget.hasAVX10_2())) return SDValue(); auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index a518347cfcd82e..b0eb210b687b19 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -910,3 +910,318 @@ multiclass avx10_convert_2op_nomb, AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>; + +//------------------------------------------------- +// AVX10 BF16 instructions +//------------------------------------------------- + +// VADDNEPBF16, VSUBNEPBF16, VMULNEPBF16, VDIVNEPBF16, VMAXPBF16, VMINPBF16 +multiclass avx10_fp_binopne_int_pbf16 opc, string OpcodeStr, + X86SchedWriteSizes sched, + bit IsCommutable = 0> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_fp_packed("int_x86_avx10_"#OpcodeStr#"pbf16512"), + !cast("int_x86_avx10_"#OpcodeStr#"pbf16512"), + v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, + T_MAP5, PD, EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_fp_packed("int_x86_avx10_"#OpcodeStr#"pbf16128"), + !cast("int_x86_avx10_"#OpcodeStr#"pbf16128"), + v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, + T_MAP5, PD, EVEX_CD8<16, CD8VF>; + defm Z256 : avx512_fp_packed("int_x86_avx10_"#OpcodeStr#"pbf16256"), + !cast("int_x86_avx10_"#OpcodeStr#"pbf16256"), + v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, + T_MAP5, PD, EVEX_CD8<16, CD8VF>; + } +} + +multiclass avx10_fp_binop_pbf16 opc, string OpcodeStr, SDPatternOperator OpNode, + X86SchedWriteSizes sched, + bit IsCommutable = 0, + SDPatternOperator MaskOpNode = OpNode> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_fp_packed, EVEX_V512, + T_MAP5, PD, EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_fp_packed, EVEX_V128, + T_MAP5, PD, EVEX_CD8<16, CD8VF>; + defm Z256 : avx512_fp_packed, EVEX_V256, + T_MAP5, PD, EVEX_CD8<16, CD8VF>; + } +} + +let Uses = [], mayRaiseFPException = 0 in { +defm VADDNEPBF16 : avx10_fp_binop_pbf16<0x58, "vaddne", fadd, SchedWriteFAddSizes, 1>; +defm VSUBNEPBF16 : avx10_fp_binop_pbf16<0x5C, "vsubne", fsub, SchedWriteFAddSizes, 0>; +defm VMULNEPBF16 : avx10_fp_binop_pbf16<0x59, "vmulne", fmul, SchedWriteFMulSizes, 1>; +defm VDIVNEPBF16 : avx10_fp_binop_pbf16<0x5E, "vdivne", fdiv, SchedWriteFDivSizes, 0>; +defm VMINPBF16 : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>; +defm VMAXPBF16 : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>; +} + +// VCOMSBF16 +let Uses = [], mayRaiseFPException = 0, + Defs = [EFLAGS], Predicates = [HasAVX10_2] in { + //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *` + //which may require extend supports on BFR16X, loadbf16, ... + defm VCOMSBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16, + "comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX, + VEX_LIG, EVEX_CD8<16, CD8VT1>; + + let isCodeGenOnly = 1 in { + defm VCOMSBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem, + sse_load_bf16, "comsbf16", SSEPackedSingle>, + T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; + } +} + +// VCMPPBF16 +multiclass avx10_vcmp_common_bf16 { + let mayRaiseFPException = 0 in { + defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc", + (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + 1>, Sched<[sched]>; + + defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc", + (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), + timm:$cc), + (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), + timm:$cc)>, + Sched<[sched.Folded, sched.ReadAfterFold]>; + + defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, ${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr#", $cc", + (X86cmpm (_.VT _.RC:$src1), + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc), + (X86cmpm_su (_.VT _.RC:$src1), + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc)>, + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + } +} + +multiclass avx10_vcmp_bf16 { + let Predicates = [HasAVX10_2_512] in + defm Z : avx10_vcmp_common_bf16, EVEX_V512; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx10_vcmp_common_bf16, EVEX_V128; + defm Z256 : avx10_vcmp_common_bf16, EVEX_V256; + } +} + +defm VCMPPBF16 : avx10_vcmp_bf16, + AVX512XDIi8Base, EVEX, VVVV, + EVEX_CD8<16, CD8VF>, TA; + + +// VSQRTNEPBF16 +multiclass avx10_sqrt_packed_bf16 opc, string OpcodeStr, + X86SchedWriteSizes sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_sqrt_packed, + EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_sqrt_packed, + EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>; + defm Z256 : avx512_sqrt_packed, + EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>; + } +} + +let Uses = [], mayRaiseFPException = 0 in +defm VSQRTNEPBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrtne", SchedWriteFSqrtSizes>; + +// VRSQRTPBF16, VRCPPBF16, VSRQTPBF16, VGETEXPPBF16 +multiclass avx10_fp14_pbf16 opc, string OpcodeStr, SDNode OpNode, + X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm PBF16Z : avx512_fp14_p, + EVEX_V512; + let Predicates = [HasAVX10_2] in { + defm PBF16Z128 : avx512_fp14_p, + EVEX_V128; + defm PBF16Z256 : avx512_fp14_p, + EVEX_V256; + } +} + +defm VRSQRT : avx10_fp14_pbf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>, + T_MAP6, PS, EVEX_CD8<16, CD8VF>; +defm VRCP : avx10_fp14_pbf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>, + T_MAP6, PS, EVEX_CD8<16, CD8VF>; +defm VGETEXP : avx10_fp14_pbf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, + T_MAP5, EVEX_CD8<16, CD8VF>; + +// VSCALEFPBF16 +multiclass avx10_fp_scalef_bf16 opc, string OpcodeStr, + X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_fp_scalef_p, + EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_fp_scalef_p, + EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS; + defm Z256 : avx512_fp_scalef_p, + EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS; + } +} + +let Uses = [], mayRaiseFPException = 0 in +defm VSCALEFPBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>; + +// VREDUCENEPBF16, VRNDSCALENEPBF16, VGETMANTPBF16 +multiclass avx10_common_unary_fp_packed_imm_bf16 opc, SDPatternOperator OpNode, + SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_unary_fp_packed_imm, EVEX_V512; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_unary_fp_packed_imm, EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, EVEX_V256; + } +} + +let Uses = [], mayRaiseFPException = 0 in { +defm VREDUCENEPBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreducene", avx512vl_bf16_info, 0x56, + X86VReduce, X86VReduce, SchedWriteFRnd>, + AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; +defm VRNDSCALENEPBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscalene", avx512vl_bf16_info, 0x08, + X86any_VRndScale, X86VRndScale, SchedWriteFRnd>, + AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; +defm VGETMANTPBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26, + X86VGetMant, X86VGetMant, SchedWriteFRnd>, + AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; +} + +// VFPCLASSPBF16 +multiclass avx10_fp_fpclass_bf16 opcVec, + X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_vector_fpclass>, EVEX_V512; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_vector_fpclass>, EVEX_V128; + defm Z256 : avx512_vector_fpclass>, EVEX_V256; + } +} + +defm VFPCLASSPBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>, + AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; + +// VF[,N]M[ADD,SUB][132,213,231]NEPBF16 +multiclass avx10_fma3p_213_bf16 opc, string OpcodeStr, + SDPatternOperator OpNode, SDNode MaskOpNode, + X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_fma3p_213_rm, EVEX_V512, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_fma3p_213_rm, EVEX_V128, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + defm Z256 : avx512_fma3p_213_rm, EVEX_V256, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + } +} + +let Uses = [], mayRaiseFPException = 0 in { +defm VFMADD213NEPBF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213nepbf16", any_fma, + fma, SchedWriteFMA>; +defm VFMSUB213NEPBF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213nepbf16", X86any_Fmsub, + X86Fmsub, SchedWriteFMA>; +defm VFNMADD213NEPBF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213nepbf16", X86any_Fnmadd, + X86Fnmadd, SchedWriteFMA>; +defm VFNMSUB213NEPBF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213nepbf16", X86any_Fnmsub, + X86Fnmsub, SchedWriteFMA>; +} + +multiclass avx10_fma3p_231_bf16 opc, string OpcodeStr, + SDPatternOperator OpNode, SDNode MaskOpNode, + X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_fma3p_231_rm, EVEX_V512, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_fma3p_231_rm, EVEX_V128, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + defm Z256 : avx512_fma3p_231_rm, EVEX_V256, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + } +} + +let Uses = [], mayRaiseFPException = 0 in { +defm VFMADD231NEPBF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231nepbf16", any_fma, + fma, SchedWriteFMA>; +defm VFMSUB231NEPBF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231nepbf16", X86any_Fmsub, + X86Fmsub, SchedWriteFMA>; +defm VFNMADD231NEPBF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231nepbf16", X86any_Fnmadd, + X86Fnmadd, SchedWriteFMA>; +defm VFNMSUB231NEPBF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231nepbf16", X86any_Fnmsub, + X86Fnmsub, SchedWriteFMA>; +} + +multiclass avx10_fma3p_132_bf16 opc, string OpcodeStr, + SDPatternOperator OpNode, SDNode MaskOpNode, + X86SchedWriteWidths sched> { + let Predicates = [HasAVX10_2_512] in + defm Z : avx512_fma3p_132_rm, EVEX_V512, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + let Predicates = [HasAVX10_2] in { + defm Z128 : avx512_fma3p_132_rm, EVEX_V128, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + defm Z256 : avx512_fma3p_132_rm, EVEX_V256, T_MAP6, PS, + EVEX_CD8<16, CD8VF>; + } +} + +let Uses = [], mayRaiseFPException = 0 in { +defm VFMADD132NEPBF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132nepbf16", any_fma, + fma, SchedWriteFMA>; +defm VFMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132nepbf16", X86any_Fmsub, + X86Fmsub, SchedWriteFMA>; +defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_Fnmadd, + X86Fnmadd, SchedWriteFMA>; +defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub, + X86Fnmsub, SchedWriteFMA>; +} diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 88d1eb59862433..c9885242131238 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2495,8 +2495,8 @@ multiclass avx512_scalar_fpclass opc, string OpcodeStr, // fpclass(reg_vec, broadcast(eltVt), imm) multiclass avx512_vector_fpclass opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, - string mem>{ - let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { + string mem, list _Uses = [MXCSR]>{ + let ExeDomain = _.ExeDomain, Uses = _Uses in { def rr : AVX512, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>; + def X86MaskCmpMaskCC : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, @@ -1139,6 +1140,10 @@ def X86SubVBroadcastld256 : PatFrag<(ops node:$src), // only load a single element. // FIXME: We should add more canolicalizing in DAGCombine. Particulary removing // the simple_load case. +def sse_load_bf16 : PatFrags<(ops node:$ptr), + [(v8bf16 (simple_load node:$ptr)), + (v8bf16 (X86vzload16 node:$ptr)), + (v8bf16 (scalar_to_vector (loadf16 node:$ptr)))]>; def sse_load_f16 : PatFrags<(ops node:$ptr), [(v8f16 (simple_load node:$ptr)), (v8f16 (X86vzload16 node:$ptr)), diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td index 208af630a352d7..531268b41da968 100644 --- a/llvm/lib/Target/X86/X86InstrUtils.td +++ b/llvm/lib/Target/X86/X86InstrUtils.td @@ -313,7 +313,7 @@ def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; -def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbh">; +def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf16">; def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; @@ -323,7 +323,7 @@ def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; -def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbh">; +def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf16">; def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; @@ -332,7 +332,7 @@ def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; -def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbh">; +def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf16">; def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 68c1ce072549b9..4f39e66e22c238 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -389,6 +389,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_128, INTR_TYPE_2OP, + X86ISD::VFPCLASS, 0), + X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_256, INTR_TYPE_2OP, + X86ISD::VFPCLASS, 0), + X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_512, INTR_TYPE_2OP, + X86ISD::VFPCLASS, 0), + X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_128, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_256, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_512, INTR_TYPE_1OP_MASK, + X86ISD::FGETEXP, 0), + X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_128, INTR_TYPE_2OP_MASK, + X86ISD::VGETMANT, 0), + X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_256, INTR_TYPE_2OP_MASK, + X86ISD::VGETMANT, 0), + X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_512, INTR_TYPE_2OP_MASK, + X86ISD::VGETMANT, 0), + X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_128, INTR_TYPE_1OP_MASK, + X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_256, INTR_TYPE_1OP_MASK, + X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_512, INTR_TYPE_1OP_MASK, + X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_128, INTR_TYPE_2OP_MASK, + X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_256, INTR_TYPE_2OP_MASK, + X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_512, INTR_TYPE_2OP_MASK, + X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_128, INTR_TYPE_2OP_MASK, + X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_256, INTR_TYPE_2OP_MASK, + X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_512, INTR_TYPE_2OP_MASK, + X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_128, INTR_TYPE_1OP_MASK, + X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_256, INTR_TYPE_1OP_MASK, + X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_512, INTR_TYPE_1OP_MASK, + X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_128, INTR_TYPE_2OP_MASK, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_256, INTR_TYPE_2OP_MASK, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_512, INTR_TYPE_2OP_MASK, + X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx10_mask_vcmppd256, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE), X86_INTRINSIC_DATA(avx10_mask_vcmpph256, CMP_MASK_CC, X86ISD::CMPMM, @@ -655,6 +703,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), + X86_INTRINSIC_DATA(avx10_vcomsbf16eq, COMI, X86ISD::COMI, ISD::SETEQ), + X86_INTRINSIC_DATA(avx10_vcomsbf16ge, COMI, X86ISD::COMI, ISD::SETGE), + X86_INTRINSIC_DATA(avx10_vcomsbf16gt, COMI, X86ISD::COMI, ISD::SETGT), + X86_INTRINSIC_DATA(avx10_vcomsbf16le, COMI, X86ISD::COMI, ISD::SETLE), + X86_INTRINSIC_DATA(avx10_vcomsbf16lt, COMI, X86ISD::COMI, ISD::SETLT), + X86_INTRINSIC_DATA(avx10_vcomsbf16neq, COMI, X86ISD::COMI, ISD::SETNE), X86_INTRINSIC_DATA(avx10_vcvtne2ph2bf8128, INTR_TYPE_2OP, X86ISD::VCVTNE2PH2BF8, 0), X86_INTRINSIC_DATA(avx10_vcvtne2ph2bf8256, INTR_TYPE_2OP, diff --git a/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll new file mode 100644 index 00000000000000..c41e03ba637cbe --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll @@ -0,0 +1,1244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 | FileCheck %s + +define <8 x bfloat> @fma_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { +; CHECK-LABEL: fma_123_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { +; CHECK-LABEL: fma_213_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { +; CHECK-LABEL: fma_231_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { +; CHECK-LABEL: fma_321_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { +; CHECK-LABEL: fma_132_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { +; CHECK-LABEL: fma_312_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_123_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_213_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_231_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_321_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_132_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_312_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) + ret <8 x bfloat> %a +} + +define <8 x bfloat> @fma_mask_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_mask_123_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_mask_213_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_mask_231_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_mask_321_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_mask_132_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_mask_312_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_maskz_123_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_maskz_213_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_maskz_231_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_maskz_321_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_maskz_132_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) { +; CHECK-LABEL: fma_maskz_312_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_mask_load_123_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_mask_load_213_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_mask_load_231_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_mask_load_321_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_mask_load_132_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_mask_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_mask_load_312_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_maskz_load_123_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_maskz_load_213_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_maskz_load_231_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_maskz_load_321_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_maskz_load_132_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <8 x bfloat> @fma_maskz_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) { +; CHECK-LABEL: fma_maskz_load_312_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <8 x bfloat>, ptr %zp + %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y) + %b = bitcast i8 %mask to <8 x i1> + %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %c +} + +define <16 x bfloat> @fma_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { +; CHECK-LABEL: fma_123_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { +; CHECK-LABEL: fma_213_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { +; CHECK-LABEL: fma_231_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { +; CHECK-LABEL: fma_321_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { +; CHECK-LABEL: fma_132_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) { +; CHECK-LABEL: fma_312_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_123_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_213_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_231_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_321_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_132_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_312_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) + ret <16 x bfloat> %a +} + +define <16 x bfloat> @fma_mask_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_mask_123_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_mask_213_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_mask_231_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_mask_321_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_mask_132_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_mask_312_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_maskz_123_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_maskz_213_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_maskz_231_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_maskz_321_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_maskz_132_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) { +; CHECK-LABEL: fma_maskz_312_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_mask_load_123_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_mask_load_213_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_mask_load_231_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_mask_load_321_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_mask_load_132_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_mask_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_mask_load_312_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_maskz_load_123_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_maskz_load_213_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_maskz_load_231_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_maskz_load_321_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_maskz_load_132_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <16 x bfloat> @fma_maskz_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) { +; CHECK-LABEL: fma_maskz_load_312_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <16 x bfloat>, ptr %zp + %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y) + %b = bitcast i16 %mask to <16 x i1> + %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %c +} + +define <32 x bfloat> @fma_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { +; CHECK-LABEL: fma_123_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { +; CHECK-LABEL: fma_213_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { +; CHECK-LABEL: fma_231_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { +; CHECK-LABEL: fma_321_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { +; CHECK-LABEL: fma_132_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) { +; CHECK-LABEL: fma_312_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_123_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_213_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_231_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_321_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_132_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) { +; CHECK-LABEL: fma_load_312_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) + ret <32 x bfloat> %a +} + +define <32 x bfloat> @fma_mask_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_mask_123_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_mask_213_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_mask_231_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_mask_321_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_mask_132_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_mask_312_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_maskz_123_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_maskz_213_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_maskz_231_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_maskz_321_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_maskz_132_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) { +; CHECK-LABEL: fma_maskz_312_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_mask_load_123_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_mask_load_213_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_mask_load_231_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_mask_load_321_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_mask_load_132_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_mask_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_mask_load_312_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_maskz_load_123_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_maskz_load_213_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_maskz_load_231_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_maskz_load_321_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_maskz_load_132_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} + +define <32 x bfloat> @fma_maskz_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) { +; CHECK-LABEL: fma_maskz_load_312_v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %z = load <32 x bfloat>, ptr %zp + %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y) + %b = bitcast i32 %mask to <32 x i1> + %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %c +} diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll new file mode 100644 index 00000000000000..33c40ac6bb32c7 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll @@ -0,0 +1,587 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 + +define <32 x bfloat> @test_int_x86_avx10_vaddnepbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vaddnepbf16512: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fadd <32 x bfloat> %x1, %x2 + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x58,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x58,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %res0 = fadd <32 x bfloat> %x1, %x2 + %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0xc2] +; X64-NEXT: vaddnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0x0e] +; X64-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0xc2] +; X86-NEXT: vaddnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0x08] +; X86-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x bfloat>, ptr %ptr + %res0 = fadd <32 x bfloat> %x1, %x2 + %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer + %t2 = fadd <32 x bfloat> %x1, %val + %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer + %res3 = fadd <32 x bfloat> %res1, %res2 + ret <32 x bfloat> %res3 +} + +define <32 x bfloat> @test_int_x86_avx10_sub_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5c,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fsub <32 x bfloat> %x1, %x2 + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5c,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5c,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %res0 = fsub <32 x bfloat> %x1, %x2 + %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2] +; X64-NEXT: vsubnepbf16 (%rsi), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x0e] +; X64-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2] +; X86-NEXT: vsubnepbf16 (%eax), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x08] +; X86-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x bfloat>, ptr %ptr + %res0 = fsub <32 x bfloat> %x1, %x2 + %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer + %t2 = fsub <32 x bfloat> %x1, %val + %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer + %res3 = fsub <32 x bfloat> %res1, %res2 + ret <32 x bfloat> %res3 +} + +declare <32 x bfloat> @llvm.x86.avx10.vmulnepbf16512(<32 x bfloat>, <32 x bfloat>) + +define <32 x bfloat> @test_int_x86_avx10_mul_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fmul <32 x bfloat> %x1, %x2 + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x59,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x59,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %res0 = fmul <32 x bfloat> %x1, %x2 + %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0xc2] +; X64-NEXT: vmulnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0x0e] +; X64-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0xc2] +; X86-NEXT: vmulnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0x08] +; X86-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x bfloat>, ptr %ptr + %res0 = fmul <32 x bfloat> %x1, %x2 + %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer + %t2 = fmul <32 x bfloat> %x1, %val + %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer + %res3 = fmul <32 x bfloat> %res1, %res2 + ret <32 x bfloat> %res3 +} + +define <32 x bfloat> @test_int_x86_avx10_div_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fdiv <32 x bfloat> %x1, %x2 + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5e,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5e,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %res0 = fdiv <32 x bfloat> %x1, %x2 + %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src + ret <32 x bfloat> %res +} + +define <32 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0xc2] +; X64-NEXT: vdivnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0x0e] +; X64-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0xc2] +; X86-NEXT: vdivnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0x08] +; X86-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x bfloat>, ptr %ptr + %res0 = fdiv <32 x bfloat> %x1, %x2 + %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer + %t2 = fdiv <32 x bfloat> %x1, %val + %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer + %res3 = fdiv <32 x bfloat> %res1, %res2 + ret <32 x bfloat> %res3 +} + +define i32 @test_int_x86_avx10_vcmppbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vcmppbf16512: +; CHECK: # %bb.0: +; CHECK-NEXT: vcmpunordpbf16 %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x48,0xc2,0xc1,0x03] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = fcmp uno <32 x bfloat> %x1, %x2 + %res = bitcast <32 x i1> %1 to i32 + ret i32 %res +} + +; FIXME: _mm512_mask_cmp_p[s|h]_mask is not using {k2} but gcc does +define i32 @test_int_x86_avx10_vcmppbf16512_mask2(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vcmppbf16512_mask2: +; CHECK: # %bb.0: +; CHECK-NEXT: vcmpeqpbf16 %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x48,0xc2,0xc1,0x00] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = fcmp oeq <32 x bfloat> %x1, %x2 + %2 = and <32 x i1> %1, + %3 = bitcast <32 x i1> %2 to i32 + ret i32 %3 +} + +define <32 x bfloat> @test_sqrt_nepbf16_512(<32 x bfloat> %a0) { +; CHECK-LABEL: test_sqrt_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtnepbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x51,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %a0) + ret <32 x bfloat> %1 +} + +define <32 x bfloat> @test_mm512_mask_sqrt_pbh(<32 x bfloat> %__W, i32 %__U, <32 x bfloat> %__A) { +; X64-LABEL: test_mm512_mask_sqrt_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsqrtnepbf16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x51,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask_sqrt_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsqrtnepbf16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x51,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %__A) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__W + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_maskz_sqrt_pbh(i32 %__U, <32 x bfloat>%__A) { +; X64-LABEL: test_mm512_maskz_sqrt_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsqrtnepbf16 %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x51,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_maskz_sqrt_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsqrtnepbf16 %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x51,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %__A) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_fmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; CHECK-LABEL: test_mm512_fmaddne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xa8,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) + ret <32 x bfloat> %0 +} + +define <32 x bfloat> @test_mm512_mask_fmaddne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_mask_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x98,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x98,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_mask3_fmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) { +; X64-LABEL: test_mm512_mask3_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xb8,0xd1] +; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask3_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xb8,0xd1] +; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_maskz_fmaddne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_maskz_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xa8,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_maskz_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xa8,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_fmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; CHECK-LABEL: test_mm512_fmsubne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xaa,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i) + ret <32 x bfloat> %0 +} + +define <32 x bfloat> @test_mm512_mask_fmsubne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_mask_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9a,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9a,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_mask3_fmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) { +; X64-LABEL: test_mm512_mask3_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xba,0xd1] +; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask3_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xba,0xd1] +; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_maskz_fmsubne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_maskz_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xaa,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_maskz_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xaa,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_fnmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; CHECK-LABEL: test_mm512_fnmaddne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xac,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <32 x bfloat> %__B + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i, <32 x bfloat> %__C) + ret <32 x bfloat> %0 +} + +define <32 x bfloat> @test_mm512_mask_fnmaddne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_mask_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__B + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_mask3_fnmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) { +; X64-LABEL: test_mm512_mask3_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbc,0xd1] +; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask3_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbc,0xd1] +; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__B + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_maskz_fnmaddne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_maskz_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xac,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_maskz_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xac,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__B + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_fnmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; CHECK-LABEL: test_mm512_fnmsubne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xae,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <32 x bfloat> %__B + %fneg1.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i, <32 x bfloat> %fneg1.i) + ret <32 x bfloat> %0 +} + +define <32 x bfloat> @test_mm512_mask_fnmsubne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_mask_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9e,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9e,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__B + %fneg1.i.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_mask3_fnmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) { +; X64-LABEL: test_mm512_mask3_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbe,0xd1] +; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_mask3_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbe,0xd1] +; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__B + %fneg1.i.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C + ret <32 x bfloat> %2 +} + +define <32 x bfloat> @test_mm512_maskz_fnmsubne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) { +; X64-LABEL: test_mm512_maskz_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xae,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm512_maskz_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xae,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <32 x bfloat> %__B + %fneg1.i.i = fneg <32 x bfloat> %__C + %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i) + %1 = bitcast i32 %__U to <32 x i1> + %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %2 +} diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll new file mode 100644 index 00000000000000..7b81d547db085c --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 + +declare <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat>, <32 x bfloat>) + +define <32 x bfloat> @test_int_x86_avx10_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5d,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) + ret <32 x bfloat> %res0 +} + +define <32 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) { +; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) + %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %res1 +} + +declare <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat>, <32 x bfloat>) + +define <32 x bfloat> @test_int_x86_avx10_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5f,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) + ret <32 x bfloat> %res0 +} + +define <32 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) { +; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) + %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer + ret <32 x bfloat> %res1 +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32) + +define <32 x bfloat> @test_rsqrt_nepbf16_512(<32 x bfloat> %a0) { +; CHECK-LABEL: test_rsqrt_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vrsqrtpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> zeroinitializer, i32 -1) + ret <32 x bfloat> %res +} + +declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32) + +define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vfpclasspbf16 $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: vfpclasspbf16 $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 4) + %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 2) + %1 = and <32 x i1> %res1, %res + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32) + +define <32 x bfloat> @test_rcp_nepbf16_512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask) { +; X64-LABEL: test_rcp_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8] +; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_rcp_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8] +; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask) + ret <32 x bfloat> %res +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32) + +define <32 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08] +; X64-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08] +; X86-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3) + %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1) + %res2 = fadd <32 x bfloat> %res, %res1 + ret <32 x bfloat> %res2 +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32) + +define <32 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08] +; X64-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08] +; X86-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3) + %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1) + %res2 = fadd <32 x bfloat> %res, %res1 + ret <32 x bfloat> %res2 +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32) + +define <32 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2) { +; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0] +; X64-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8] +; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0] +; X86-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8] +; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2) + %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> zeroinitializer, i32 -1) + %res3 = fadd <32 x bfloat> %res1, %res2 + ret <32 x bfloat> %res3 +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32) + +define <32 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08] +; X64-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08] +; X86-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3) + %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1) + %res2 = fadd <32 x bfloat> %res, %res1 + ret <32 x bfloat> %res2 +} + +declare <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat>, <32 x bfloat>, <32 x bfloat>, i32) + +define <32 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1] +; X64-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0] +; X64-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1] +; X86-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0] +; X86-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i32 %x3 to <32 x i1> + %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3) + %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> zeroinitializer, i32 -1) + %res3 = fadd <32 x bfloat> %res1, %res2 + ret <32 x bfloat> %res3 +} diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll new file mode 100644 index 00000000000000..e0f5679e8ac96d --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll @@ -0,0 +1,1168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 + +define <16 x bfloat> @test_int_x86_avx10_add_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_add_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fadd <16 x bfloat> %x1, %x2 + ret <16 x bfloat> %res +} + +define <16 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %res0 = fadd <16 x bfloat> %x1, %x2 + %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src + ret <16 x bfloat> %res +} +define <16 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2] +; X64-NEXT: vaddnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x0e] +; X64-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2] +; X86-NEXT: vaddnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x08] +; X86-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %val = load <16 x bfloat>, ptr %ptr + %res0 = fadd <16 x bfloat> %x1, %x2 + %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer + %t2 = fadd <16 x bfloat> %x1, %val + %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer + %res3 = fadd <16 x bfloat> %res1, %res2 + ret <16 x bfloat> %res3 +} + +define <8 x bfloat> @test_int_x86_avx10_add_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_add_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fadd <8 x bfloat> %x1, %x2 + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %res0 = fadd <8 x bfloat> %x1, %x2 + %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2] +; X64-NEXT: vaddnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x0e] +; X64-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2] +; X86-NEXT: vaddnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x08] +; X86-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %val = load <8 x bfloat>, ptr %ptr + %res0 = fadd <8 x bfloat> %x1, %x2 + %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer + %t2 = fadd <8 x bfloat> %x1, %val + %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer + %res3 = fadd <8 x bfloat> %res1, %res2 + ret <8 x bfloat> %res3 +} + +define <16 x bfloat> @test_int_x86_avx10_sub_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5c,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fsub <16 x bfloat> %x1, %x2 + ret <16 x bfloat> %res +} + +define <16 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %res0 = fsub <16 x bfloat> %x1, %x2 + %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src + ret <16 x bfloat> %res +} + +define <16 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2] +; X64-NEXT: vsubnepbf16 (%rsi), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x0e] +; X64-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2] +; X86-NEXT: vsubnepbf16 (%eax), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x08] +; X86-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %val = load <16 x bfloat>, ptr %ptr + %res0 = fsub <16 x bfloat> %x1, %x2 + %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer + %t2 = fsub <16 x bfloat> %x1, %val + %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer + %res3 = fsub <16 x bfloat> %res1, %res2 + ret <16 x bfloat> %res3 +} + +define <8 x bfloat> @test_int_x86_avx10_sub_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5c,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fsub <8 x bfloat> %x1, %x2 + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %res0 = fsub <8 x bfloat> %x1, %x2 + %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2] +; X64-NEXT: vsubnepbf16 (%rsi), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x0e] +; X64-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2] +; X86-NEXT: vsubnepbf16 (%eax), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x08] +; X86-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %val = load <8 x bfloat>, ptr %ptr + %res0 = fsub <8 x bfloat> %x1, %x2 + %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer + %t2 = fsub <8 x bfloat> %x1, %val + %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer + %res3 = fsub <8 x bfloat> %res1, %res2 + ret <8 x bfloat> %res3 +} + +define <16 x bfloat> @test_int_x86_avx10_mul_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fmul <16 x bfloat> %x1, %x2 + ret <16 x bfloat> %res +} + +define <16 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %res0 = fmul <16 x bfloat> %x1, %x2 + %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src + ret <16 x bfloat> %res +} + +define <16 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2] +; X64-NEXT: vmulnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x0e] +; X64-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2] +; X86-NEXT: vmulnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x08] +; X86-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %val = load <16 x bfloat>, ptr %ptr + %res0 = fmul <16 x bfloat> %x1, %x2 + %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer + %t2 = fmul <16 x bfloat> %x1, %val + %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer + %res3 = fmul <16 x bfloat> %res1, %res2 + ret <16 x bfloat> %res3 +} + +define <8 x bfloat> @test_int_x86_avx10_mul_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fmul <8 x bfloat> %x1, %x2 + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %res0 = fmul <8 x bfloat> %x1, %x2 + %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2] +; X64-NEXT: vmulnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x0e] +; X64-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2] +; X86-NEXT: vmulnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x08] +; X86-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %val = load <8 x bfloat>, ptr %ptr + %res0 = fmul <8 x bfloat> %x1, %x2 + %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer + %t2 = fmul <8 x bfloat> %x1, %val + %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer + %res3 = fmul <8 x bfloat> %res1, %res2 + ret <8 x bfloat> %res3 +} + +define <16 x bfloat> @test_int_x86_avx10_div_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fdiv <16 x bfloat> %x1, %x2 + ret <16 x bfloat> %res +} + +define <16 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %res0 = fdiv <16 x bfloat> %x1, %x2 + %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src + ret <16 x bfloat> %res +} + +; FIXME: assembly order is different from fp16 ones +define <16 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2] +; X64-NEXT: vdivnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x0e] +; X64-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2] +; X86-NEXT: vdivnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x08] +; X86-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %val = load <16 x bfloat>, ptr %ptr + %res0 = fdiv <16 x bfloat> %x1, %x2 + %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer + %t2 = fdiv <16 x bfloat> %x1, %val + %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer + %res3 = fdiv <16 x bfloat> %res1, %res2 + ret <16 x bfloat> %res3 +} + +define <8 x bfloat> @test_int_x86_avx10_div_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = fdiv <8 x bfloat> %x1, %x2 + ret <8 x bfloat> %res +} + +define <8 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %res0 = fdiv <8 x bfloat> %x1, %x2 + %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src + ret <8 x bfloat> %res +} + +; FIXME: assembly order is different from fp16 ones +define <8 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) { +; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2] +; X64-NEXT: vdivnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x0e] +; X64-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2] +; X86-NEXT: vdivnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x08] +; X86-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %val = load <8 x bfloat>, ptr %ptr + %res0 = fdiv <8 x bfloat> %x1, %x2 + %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer + %t2 = fdiv <8 x bfloat> %x1, %val + %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer + %res3 = fdiv <8 x bfloat> %res1, %res2 + ret <8 x bfloat> %res3 +} + +define i16 @test_int_x86_avx10_vcmppbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vcmppbf16256: +; CHECK: # %bb.0: +; CHECK-NEXT: vcmpunordpbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x03] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = fcmp uno <16 x bfloat> %x1, %x2 + %res = bitcast <16 x i1> %1 to i16 + ret i16 %res +} + +define i16 @test_int_x86_avx10_vcmppbf16256_mask2(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vcmppbf16256_mask2: +; CHECK: # %bb.0: +; CHECK-NEXT: vcmpeqpbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x00] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03] +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = fcmp oeq <16 x bfloat> %x1, %x2 + %2 = and <16 x i1> %1, + %3 = bitcast <16 x i1> %2 to i16 + ret i16 %3 +} + +define i8 @test_int_x86_avx10_vcmppbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vcmppbf16128: +; CHECK: # %bb.0: +; CHECK-NEXT: vcmpunordpbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x03] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = fcmp uno <8 x bfloat> %x1, %x2 + %res = bitcast <8 x i1> %1 to i8 + ret i8 %res +} + +define i8 @test_int_x86_avx10_vcmppbf16128_mask2(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_vcmppbf16128_mask2: +; CHECK: # %bb.0: +; CHECK-NEXT: vcmpeqpbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x00] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: andb $3, %al # encoding: [0x24,0x03] +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = fcmp oeq <8 x bfloat> %x1, %x2 + %2 = and <8 x i1> %1, + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +define <16 x bfloat> @test_sqrt_nepbf16_256(<16 x bfloat> %a0) { +; CHECK-LABEL: test_sqrt_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtnepbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x51,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %a0) + ret <16 x bfloat> %1 +} + +define <16 x bfloat> @test_mm256_mask_sqrt_pbh(<16 x bfloat> %__W, i16 %__U, <16 x bfloat> %__A) { +; X64-LABEL: test_mm256_mask_sqrt_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsqrtnepbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask_sqrt_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsqrtnepbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__W + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_maskz_sqrt_pbh(i16 %__U, <16 x bfloat>%__A) { +; X64-LABEL: test_mm256_maskz_sqrt_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsqrtnepbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_maskz_sqrt_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsqrtnepbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %2 +} + +define <8 x bfloat> @test_sqrt_nepbf16_128(<8 x bfloat> %a0) { +; CHECK-LABEL: test_sqrt_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtnepbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x51,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a0) + ret <8 x bfloat> %1 +} + +define <8 x bfloat> @test_mm_mask_sqrt_pbh(<8 x bfloat> %__W, i8 %__U, <8 x bfloat> %__A) { +; X64-LABEL: test_mm_mask_sqrt_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsqrtnepbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask_sqrt_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsqrtnepbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__W + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_maskz_sqrt_pbh(i8 %__U, <8 x bfloat>%__A) { +; X64-LABEL: test_mm_maskz_sqrt_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vsqrtnepbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_maskz_sqrt_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vsqrtnepbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; CHECK-LABEL: test_mm256_fmaddne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xa8,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) + ret <16 x bfloat> %0 +} + +define <16 x bfloat> @test_mm256_mask_fmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_mask_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_mask3_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) { +; X64-LABEL: test_mm256_mask3_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1] +; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask3_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1] +; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_maskz_fmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_maskz_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_maskz_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; CHECK-LABEL: test_mm256_fmsubne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xaa,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i) + ret <16 x bfloat> %0 +} + +define <16 x bfloat> @test_mm256_mask_fmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_mask_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_mask3_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) { +; X64-LABEL: test_mm256_mask3_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1] +; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask3_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1] +; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_maskz_fmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_maskz_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_maskz_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; CHECK-LABEL: test_mm256_fnmaddne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xac,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <16 x bfloat> %__B + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %__C) + ret <16 x bfloat> %0 +} + +define <16 x bfloat> @test_mm256_mask_fnmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_mask_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__B + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_mask3_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) { +; X64-LABEL: test_mm256_mask3_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1] +; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask3_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1] +; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__B + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_maskz_fnmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_maskz_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_maskz_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__B + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; CHECK-LABEL: test_mm256_fnmsubne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xae,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <16 x bfloat> %__B + %fneg1.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %fneg1.i) + ret <16 x bfloat> %0 +} + +define <16 x bfloat> @test_mm256_mask_fnmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_mask_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__B + %fneg1.i.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_mask3_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) { +; X64-LABEL: test_mm256_mask3_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1] +; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_mask3_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1] +; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__B + %fneg1.i.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C + ret <16 x bfloat> %2 +} + +define <16 x bfloat> @test_mm256_maskz_fnmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) { +; X64-LABEL: test_mm256_maskz_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm256_maskz_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <16 x bfloat> %__B + %fneg1.i.i = fneg <16 x bfloat> %__C + %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; CHECK-LABEL: test_mm_fmaddne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xa8,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) + ret <8 x bfloat> %0 +} + +define <8 x bfloat> @test_mm_mask_fmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_mask_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_mask3_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) { +; X64-LABEL: test_mm_mask3_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1] +; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask3_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1] +; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_maskz_fmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_maskz_fmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_maskz_fmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; CHECK-LABEL: test_mm_fmsubne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xaa,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i) + ret <8 x bfloat> %0 +} + +define <8 x bfloat> @test_mm_mask_fmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_mask_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_mask3_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) { +; X64-LABEL: test_mm_mask3_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1] +; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask3_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1] +; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_maskz_fmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_maskz_fmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_maskz_fmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; CHECK-LABEL: test_mm_fnmaddne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xac,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <8 x bfloat> %__B + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %__C) + ret <8 x bfloat> %0 +} + +define <8 x bfloat> @test_mm_mask_fnmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_mask_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__B + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_mask3_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) { +; X64-LABEL: test_mm_mask3_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1] +; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask3_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1] +; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__B + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_maskz_fnmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_maskz_fnmaddne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_maskz_fnmaddne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__B + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; CHECK-LABEL: test_mm_fnmsubne_pbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xae,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %fneg.i = fneg <8 x bfloat> %__B + %fneg1.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %fneg1.i) + ret <8 x bfloat> %0 +} + +define <8 x bfloat> @test_mm_mask_fnmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_mask_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__B + %fneg1.i.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_mask3_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) { +; X64-LABEL: test_mm_mask3_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1] +; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_mask3_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1] +; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__B + %fneg1.i.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C + ret <8 x bfloat> %2 +} + +define <8 x bfloat> @test_mm_maskz_fnmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) { +; X64-LABEL: test_mm_maskz_fnmsubne_pbh: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_mm_maskz_fnmsubne_pbh: +; X86: # %bb.0: # %entry +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +entry: + %fneg.i.i = fneg <8 x bfloat> %__B + %fneg1.i.i = fneg <8 x bfloat> %__C + %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %2 +} diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll new file mode 100644 index 00000000000000..559d866b55cc7b --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll @@ -0,0 +1,602 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 + +declare <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat>, <16 x bfloat>) + +define <16 x bfloat> @test_int_x86_avx10_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5d,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) + ret <16 x bfloat> %res0 +} + +define <16 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) { +; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) + %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %res1 +} + +declare <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat>, <8 x bfloat>) + +define <8 x bfloat> @test_int_x86_avx10_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5d,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) + ret <8 x bfloat> %res0 +} + +define <8 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) { +; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) + %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %res1 +} + +declare <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat>, <16 x bfloat>) + +define <16 x bfloat> @test_int_x86_avx10_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5f,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) + ret <16 x bfloat> %res0 +} + +define <16 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) { +; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %msk to <16 x i1> + %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) + %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer + ret <16 x bfloat> %res1 +} + +declare <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat>, <8 x bfloat>) + +define <8 x bfloat> @test_int_x86_avx10_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) { +; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5f,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) + ret <8 x bfloat> %res0 +} + +define <8 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) { +; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %msk to <8 x i1> + %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) + %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer + ret <8 x bfloat> %res1 +} + +declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>) +declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>) +declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>) +declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>) +declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>) +declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>) + +define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) { +; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1] +; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1) + ret i32 %res +} + +define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) { +; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8] +; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1) + ret i32 %res +} + +define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) { +; CHECK-LABEL: test_x86_avx10_com_nesbf16_le: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8] +; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1) + ret i32 %res +} + +define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) { +; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1] +; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1) + ret i32 %res +} + +define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) { +; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq: +; CHECK: # %bb.0: +; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1] +; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1) + ret i32 %res +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16) + +define <8 x bfloat> @test_rsqrt_nepbf16_128(<8 x bfloat> %a0) { +; CHECK-LABEL: test_rsqrt_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vrsqrtpbf16 %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> zeroinitializer, i8 -1) + ret <8 x bfloat> %res +} + +define <16 x bfloat> @test_rsqrt_nepbf16_256(<16 x bfloat> %a0) { +; CHECK-LABEL: test_rsqrt_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vrsqrtpbf16 %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> zeroinitializer, i16 -1) + ret <16 x bfloat> %res +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16) + +define <8 x bfloat> @test_rcp_nepbf16_128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask) { +; X64-LABEL: test_rcp_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8] +; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_rcp_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8] +; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask) + ret <8 x bfloat> %res +} + +define <16 x bfloat> @test_rcp_nepbf16_256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask) { +; X64-LABEL: test_rcp_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8] +; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_rcp_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8] +; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask) + ret <16 x bfloat> %res +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16) + +define <8 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08] +; X64-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08] +; X86-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3) + %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1) + %res2 = fadd <8 x bfloat> %res, %res1 + ret <8 x bfloat> %res2 +} + +define <16 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08] +; X64-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08] +; X86-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3) + %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1) + %res2 = fadd <16 x bfloat> %res, %res1 + ret <16 x bfloat> %res2 +} + +declare <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat>, i32) +declare <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat>, i32) + +define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 4) + %res1 = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 2) + %1 = and <8 x i1> %res1, %res + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02] +; CHECK-NEXT: vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04] +; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 4) + %res1 = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 2) + %1 = and <16 x i1> %res1, %res + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16) + +define <8 x bfloat>@test_int_x86_avx512_getexp_nepbf16_128(<8 x bfloat> %x0) { +; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vgetexppbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x42,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 -1) + ret <8 x bfloat> %res +} + +define <8 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2) { +; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8] +; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8] +; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2) + ret <8 x bfloat> %res +} + +define <8 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_128(<8 x bfloat> %x0, i8 %x2) { +; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 %x2) + ret <8 x bfloat> %res +} + +define <16 x bfloat>@test_int_x86_avx512_getexp_nepbf16_256(<16 x bfloat> %x0) { +; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vgetexppbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x42,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 -1) + ret <16 x bfloat> %res +} + +define <16 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2) { +; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8] +; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8] +; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2) + ret <16 x bfloat> %res +} + +define <16 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_256(<16 x bfloat> %x0, i16 %x2) { +; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 %x2) + ret <16 x bfloat> %res +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16) + +define <8 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08] +; X64-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08] +; X86-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3) + %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1) + %res2 = fadd <8 x bfloat> %res, %res1 + ret <8 x bfloat> %res2 +} + +define <16 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08] +; X64-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08] +; X86-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3) + %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1) + %res2 = fadd <16 x bfloat> %res, %res1 + ret <16 x bfloat> %res2 +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16) + +define <8 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08] +; X64-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08] +; X86-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3) + %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1) + %res2 = fadd <8 x bfloat> %res, %res1 + ret <8 x bfloat> %res2 +} + +define <16 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08] +; X64-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04] +; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08] +; X86-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04] +; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3) + %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1) + %res2 = fadd <16 x bfloat> %res, %res1 + ret <16 x bfloat> %res2 +} + +declare <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat>, <8 x bfloat>, <8 x bfloat>, i8) +declare <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat>, <16 x bfloat>, <16 x bfloat>, i16) + +define <8 x bfloat>@test_int_x86_avx512_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1) { +; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x2c,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 -1) + ret <8 x bfloat> %res +} + +define <8 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1] +; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1] +; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %x3 to <8 x i1> + %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3) + ret <8 x bfloat> %res +} + +define <8 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x3) { +; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i8 %x3 to <8 x i1> + %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 %x3) + ret <8 x bfloat> %res +} + +define <16 x bfloat>@test_int_x86_avx512_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1) { +; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x2c,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 -1) + ret <16 x bfloat> %res +} + +define <16 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3) { +; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1] +; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1] +; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %x3 to <16 x i1> + %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3) + ret <16 x bfloat> %res +} + +define <16 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x3) { +; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1] +; X86-NEXT: retl # encoding: [0xc3] + %mask = bitcast i16 %x3 to <16 x i1> + %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 %x3) + ret <16 x bfloat> %res +} diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt new file mode 100644 index 00000000000000..8cc53db077e4f7 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt @@ -0,0 +1,3015 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vaddnepbf16 ymm2, ymm3, ymm4 +0x62,0xf5,0x65,0x28,0x58,0xd4 + +# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vaddnepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf5,0x65,0x2f,0x58,0xd4 + +# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf5,0x65,0xaf,0x58,0xd4 + +# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vaddnepbf16 zmm2, zmm3, zmm4 +0x62,0xf5,0x65,0x48,0x58,0xd4 + +# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vaddnepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf5,0x65,0x4f,0x58,0xd4 + +# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf5,0x65,0xcf,0x58,0xd4 + +# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vaddnepbf16 xmm2, xmm3, xmm4 +0x62,0xf5,0x65,0x08,0x58,0xd4 + +# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vaddnepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf5,0x65,0x0f,0x58,0xd4 + +# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf5,0x65,0x8f,0x58,0xd4 + +# ATT: vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf5,0x65,0x58,0x58,0x10 + +# ATT: vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f + +# ATT: vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf5,0x65,0xdf,0x58,0x52,0x80 + +# ATT: vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf5,0x65,0x38,0x58,0x10 + +# ATT: vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f + +# ATT: vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf5,0x65,0xbf,0x58,0x52,0x80 + +# ATT: vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf5,0x65,0x18,0x58,0x10 + +# ATT: vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f + +# ATT: vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf5,0x65,0x9f,0x58,0x52,0x80 + +# ATT: vcmppbf16 $123, %ymm4, %ymm3, %k5 +# INTEL: vcmppbf16 k5, ymm3, ymm4, 123 +0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b + +# ATT: vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm3, ymm4, 123 +0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b + +# ATT: vcmppbf16 $123, %xmm4, %xmm3, %k5 +# INTEL: vcmppbf16 k5, xmm3, xmm4, 123 +0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b + +# ATT: vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm3, xmm4, 123 +0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b + +# ATT: vcmppbf16 $123, %zmm4, %zmm3, %k5 +# INTEL: vcmppbf16 k5, zmm3, zmm4, 123 +0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b + +# ATT: vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm3, zmm4, 123 +0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b + +# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5 +# INTEL: vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5 +# INTEL: vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123 +0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b + +# ATT: vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5 +# INTEL: vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123 +0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123 +0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b + +# ATT: vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123 +0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b + +# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5 +# INTEL: vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5 +# INTEL: vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123 +0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b + +# ATT: vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5 +# INTEL: vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123 +0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123 +0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b + +# ATT: vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123 +0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b + +# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5 +# INTEL: vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5 +# INTEL: vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123 +0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b + +# ATT: vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5 +# INTEL: vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123 +0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123 +0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b + +# ATT: vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123 +0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b + +# ATT: vcomsbf16 %xmm3, %xmm2 +# INTEL: vcomsbf16 xmm2, xmm3 +0x62,0xf5,0x7d,0x08,0x2f,0xd3 + +# ATT: vcomsbf16 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcomsbf16 291(%edi,%eax,4), %xmm2 +# INTEL: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcomsbf16 (%eax), %xmm2 +# INTEL: vcomsbf16 xmm2, word ptr [eax] +0x62,0xf5,0x7d,0x08,0x2f,0x10 + +# ATT: vcomsbf16 -64(,%ebp,2), %xmm2 +# INTEL: vcomsbf16 xmm2, word ptr [2*ebp - 64] +0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vcomsbf16 254(%ecx), %xmm2 +# INTEL: vcomsbf16 xmm2, word ptr [ecx + 254] +0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f + +# ATT: vcomsbf16 -256(%edx), %xmm2 +# INTEL: vcomsbf16 xmm2, word ptr [edx - 256] +0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80 + +# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vdivnepbf16 ymm2, ymm3, ymm4 +0x62,0xf5,0x65,0x28,0x5e,0xd4 + +# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vdivnepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf5,0x65,0x2f,0x5e,0xd4 + +# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf5,0x65,0xaf,0x5e,0xd4 + +# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vdivnepbf16 zmm2, zmm3, zmm4 +0x62,0xf5,0x65,0x48,0x5e,0xd4 + +# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vdivnepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf5,0x65,0x4f,0x5e,0xd4 + +# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf5,0x65,0xcf,0x5e,0xd4 + +# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vdivnepbf16 xmm2, xmm3, xmm4 +0x62,0xf5,0x65,0x08,0x5e,0xd4 + +# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vdivnepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf5,0x65,0x0f,0x5e,0xd4 + +# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf5,0x65,0x8f,0x5e,0xd4 + +# ATT: vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf5,0x65,0x58,0x5e,0x10 + +# ATT: vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f + +# ATT: vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80 + +# ATT: vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf5,0x65,0x38,0x5e,0x10 + +# ATT: vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f + +# ATT: vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80 + +# ATT: vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf5,0x65,0x18,0x5e,0x10 + +# ATT: vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f + +# ATT: vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80 + +# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0x98,0xd4 + +# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0x98,0xd4 + +# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0x98,0x10 + +# ATT: vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f + +# ATT: vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0x98,0x52,0x80 + +# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0x98,0x10 + +# ATT: vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f + +# ATT: vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0x98,0x52,0x80 + +# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0x98,0x10 + +# ATT: vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f + +# ATT: vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0x98,0x52,0x80 + +# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xa8,0xd4 + +# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xa8,0x10 + +# ATT: vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f + +# ATT: vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80 + +# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xa8,0x10 + +# ATT: vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f + +# ATT: vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80 + +# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xa8,0x10 + +# ATT: vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f + +# ATT: vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80 + +# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xb8,0xd4 + +# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xb8,0x10 + +# ATT: vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f + +# ATT: vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80 + +# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xb8,0x10 + +# ATT: vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f + +# ATT: vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80 + +# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xb8,0x10 + +# ATT: vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f + +# ATT: vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80 + +# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0x9a,0xd4 + +# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0x9a,0x10 + +# ATT: vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f + +# ATT: vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80 + +# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0x9a,0x10 + +# ATT: vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f + +# ATT: vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80 + +# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0x9a,0x10 + +# ATT: vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f + +# ATT: vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80 + +# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xaa,0xd4 + +# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xaa,0x10 + +# ATT: vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f + +# ATT: vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80 + +# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xaa,0x10 + +# ATT: vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f + +# ATT: vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80 + +# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xaa,0x10 + +# ATT: vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f + +# ATT: vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80 + +# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xba,0xd4 + +# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xba,0xd4 + +# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xba,0x10 + +# ATT: vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f + +# ATT: vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xba,0x52,0x80 + +# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xba,0x10 + +# ATT: vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f + +# ATT: vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xba,0x52,0x80 + +# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xba,0x10 + +# ATT: vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f + +# ATT: vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xba,0x52,0x80 + +# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0x9c,0xd4 + +# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0x9c,0x10 + +# ATT: vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f + +# ATT: vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80 + +# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0x9c,0x10 + +# ATT: vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f + +# ATT: vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80 + +# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0x9c,0x10 + +# ATT: vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f + +# ATT: vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80 + +# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xac,0xd4 + +# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xac,0x10 + +# ATT: vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f + +# ATT: vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xac,0x52,0x80 + +# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xac,0x10 + +# ATT: vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f + +# ATT: vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xac,0x52,0x80 + +# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xac,0x10 + +# ATT: vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f + +# ATT: vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xac,0x52,0x80 + +# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xbc,0xd4 + +# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xbc,0x10 + +# ATT: vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f + +# ATT: vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80 + +# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xbc,0x10 + +# ATT: vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f + +# ATT: vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80 + +# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xbc,0x10 + +# ATT: vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f + +# ATT: vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80 + +# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0x9e,0xd4 + +# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0x9e,0x10 + +# ATT: vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f + +# ATT: vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80 + +# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0x9e,0x10 + +# ATT: vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f + +# ATT: vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80 + +# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0x9e,0x10 + +# ATT: vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f + +# ATT: vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80 + +# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xae,0xd4 + +# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xae,0x10 + +# ATT: vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f + +# ATT: vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xae,0x52,0x80 + +# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xae,0x10 + +# ATT: vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f + +# ATT: vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xae,0x52,0x80 + +# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xae,0x10 + +# ATT: vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f + +# ATT: vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xae,0x52,0x80 + +# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0xbe,0xd4 + +# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0xbe,0x10 + +# ATT: vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f + +# ATT: vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80 + +# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0xbe,0x10 + +# ATT: vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f + +# ATT: vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80 + +# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0xbe,0x10 + +# ATT: vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f + +# ATT: vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80 + +# ATT: vfpclasspbf16 $123, %zmm3, %k5 +# INTEL: vfpclasspbf16 k5, zmm3, 123 +0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b + +# ATT: vfpclasspbf16 $123, %zmm3, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, zmm3, 123 +0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b + +# ATT: vfpclasspbf16 $123, %ymm3, %k5 +# INTEL: vfpclasspbf16 k5, ymm3, 123 +0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b + +# ATT: vfpclasspbf16 $123, %ymm3, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, ymm3, 123 +0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b + +# ATT: vfpclasspbf16 $123, %xmm3, %k5 +# INTEL: vfpclasspbf16 k5, xmm3, 123 +0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b + +# ATT: vfpclasspbf16 $123, %xmm3, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, xmm3, 123 +0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b + +# ATT: vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5 +# INTEL: vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vfpclasspbf16 $123, (%eax){1to8}, %k5 +# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to8}, 123 +0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b + +# ATT: vfpclasspbf16x $123, -512(,%ebp,2), %k5 +# INTEL: vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123 +0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123 +0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b + +# ATT: vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123 +0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b + +# ATT: vfpclasspbf16 $123, (%eax){1to16}, %k5 +# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to16}, 123 +0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b + +# ATT: vfpclasspbf16y $123, -1024(,%ebp,2), %k5 +# INTEL: vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123 +0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123 +0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b + +# ATT: vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123 +0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b + +# ATT: vfpclasspbf16 $123, (%eax){1to32}, %k5 +# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to32}, 123 +0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b + +# ATT: vfpclasspbf16z $123, -2048(,%ebp,2), %k5 +# INTEL: vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123 +0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123 +0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b + +# ATT: vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123 +0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b + +# ATT: vgetexppbf16 %xmm3, %xmm2 +# INTEL: vgetexppbf16 xmm2, xmm3 +0x62,0xf5,0x7d,0x08,0x42,0xd3 + +# ATT: vgetexppbf16 %xmm3, %xmm2 {%k7} +# INTEL: vgetexppbf16 xmm2 {k7}, xmm3 +0x62,0xf5,0x7d,0x0f,0x42,0xd3 + +# ATT: vgetexppbf16 %xmm3, %xmm2 {%k7} {z} +# INTEL: vgetexppbf16 xmm2 {k7} {z}, xmm3 +0x62,0xf5,0x7d,0x8f,0x42,0xd3 + +# ATT: vgetexppbf16 %zmm3, %zmm2 +# INTEL: vgetexppbf16 zmm2, zmm3 +0x62,0xf5,0x7d,0x48,0x42,0xd3 + +# ATT: vgetexppbf16 %zmm3, %zmm2 {%k7} +# INTEL: vgetexppbf16 zmm2 {k7}, zmm3 +0x62,0xf5,0x7d,0x4f,0x42,0xd3 + +# ATT: vgetexppbf16 %zmm3, %zmm2 {%k7} {z} +# INTEL: vgetexppbf16 zmm2 {k7} {z}, zmm3 +0x62,0xf5,0x7d,0xcf,0x42,0xd3 + +# ATT: vgetexppbf16 %ymm3, %ymm2 +# INTEL: vgetexppbf16 ymm2, ymm3 +0x62,0xf5,0x7d,0x28,0x42,0xd3 + +# ATT: vgetexppbf16 %ymm3, %ymm2 {%k7} +# INTEL: vgetexppbf16 ymm2 {k7}, ymm3 +0x62,0xf5,0x7d,0x2f,0x42,0xd3 + +# ATT: vgetexppbf16 %ymm3, %ymm2 {%k7} {z} +# INTEL: vgetexppbf16 ymm2 {k7} {z}, ymm3 +0x62,0xf5,0x7d,0xaf,0x42,0xd3 + +# ATT: vgetexppbf16 268435456(%esp,%esi,8), %xmm2 +# INTEL: vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vgetexppbf16 (%eax){1to8}, %xmm2 +# INTEL: vgetexppbf16 xmm2, word ptr [eax]{1to8} +0x62,0xf5,0x7d,0x18,0x42,0x10 + +# ATT: vgetexppbf16 -512(,%ebp,2), %xmm2 +# INTEL: vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f + +# ATT: vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80 + +# ATT: vgetexppbf16 268435456(%esp,%esi,8), %ymm2 +# INTEL: vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vgetexppbf16 (%eax){1to16}, %ymm2 +# INTEL: vgetexppbf16 ymm2, word ptr [eax]{1to16} +0x62,0xf5,0x7d,0x38,0x42,0x10 + +# ATT: vgetexppbf16 -1024(,%ebp,2), %ymm2 +# INTEL: vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f + +# ATT: vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80 + +# ATT: vgetexppbf16 268435456(%esp,%esi,8), %zmm2 +# INTEL: vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vgetexppbf16 (%eax){1to32}, %zmm2 +# INTEL: vgetexppbf16 zmm2, word ptr [eax]{1to32} +0x62,0xf5,0x7d,0x58,0x42,0x10 + +# ATT: vgetexppbf16 -2048(,%ebp,2), %zmm2 +# INTEL: vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f + +# ATT: vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80 + +# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 +# INTEL: vgetmantpbf16 zmm2, zmm3, 123 +0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} +# INTEL: vgetmantpbf16 zmm2 {k7}, zmm3, 123 +0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z} +# INTEL: vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123 +0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 +# INTEL: vgetmantpbf16 ymm2, ymm3, 123 +0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} +# INTEL: vgetmantpbf16 ymm2 {k7}, ymm3, 123 +0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z} +# INTEL: vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123 +0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 +# INTEL: vgetmantpbf16 xmm2, xmm3, 123 +0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} +# INTEL: vgetmantpbf16 xmm2 {k7}, xmm3, 123 +0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z} +# INTEL: vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123 +0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b + +# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2 +# INTEL: vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, (%eax){1to8}, %xmm2 +# INTEL: vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123 +0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b + +# ATT: vgetmantpbf16 $123, -512(,%ebp,2), %xmm2 +# INTEL: vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123 +0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 +0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b + +# ATT: vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 +0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b + +# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2 +# INTEL: vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, (%eax){1to16}, %ymm2 +# INTEL: vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123 +0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b + +# ATT: vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2 +# INTEL: vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 +0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 +0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b + +# ATT: vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 +0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b + +# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2 +# INTEL: vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, (%eax){1to32}, %zmm2 +# INTEL: vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123 +0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b + +# ATT: vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2 +# INTEL: vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 +0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 +0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b + +# ATT: vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 +0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b + +# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vmaxpbf16 ymm2, ymm3, ymm4 +0x62,0xf5,0x65,0x28,0x5f,0xd4 + +# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vmaxpbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf5,0x65,0x2f,0x5f,0xd4 + +# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf5,0x65,0xaf,0x5f,0xd4 + +# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vmaxpbf16 zmm2, zmm3, zmm4 +0x62,0xf5,0x65,0x48,0x5f,0xd4 + +# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vmaxpbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf5,0x65,0x4f,0x5f,0xd4 + +# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf5,0x65,0xcf,0x5f,0xd4 + +# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vmaxpbf16 xmm2, xmm3, xmm4 +0x62,0xf5,0x65,0x08,0x5f,0xd4 + +# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vmaxpbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf5,0x65,0x0f,0x5f,0xd4 + +# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf5,0x65,0x8f,0x5f,0xd4 + +# ATT: vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf5,0x65,0x58,0x5f,0x10 + +# ATT: vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f + +# ATT: vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80 + +# ATT: vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf5,0x65,0x38,0x5f,0x10 + +# ATT: vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f + +# ATT: vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80 + +# ATT: vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf5,0x65,0x18,0x5f,0x10 + +# ATT: vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f + +# ATT: vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80 + +# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vminpbf16 ymm2, ymm3, ymm4 +0x62,0xf5,0x65,0x28,0x5d,0xd4 + +# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vminpbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf5,0x65,0x2f,0x5d,0xd4 + +# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf5,0x65,0xaf,0x5d,0xd4 + +# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vminpbf16 zmm2, zmm3, zmm4 +0x62,0xf5,0x65,0x48,0x5d,0xd4 + +# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vminpbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf5,0x65,0x4f,0x5d,0xd4 + +# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf5,0x65,0xcf,0x5d,0xd4 + +# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vminpbf16 xmm2, xmm3, xmm4 +0x62,0xf5,0x65,0x08,0x5d,0xd4 + +# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vminpbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf5,0x65,0x0f,0x5d,0xd4 + +# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf5,0x65,0x8f,0x5d,0xd4 + +# ATT: vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vminpbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vminpbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf5,0x65,0x58,0x5d,0x10 + +# ATT: vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f + +# ATT: vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80 + +# ATT: vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vminpbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vminpbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf5,0x65,0x38,0x5d,0x10 + +# ATT: vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f + +# ATT: vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80 + +# ATT: vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vminpbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vminpbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf5,0x65,0x18,0x5d,0x10 + +# ATT: vminpbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f + +# ATT: vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80 + +# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vmulnepbf16 ymm2, ymm3, ymm4 +0x62,0xf5,0x65,0x28,0x59,0xd4 + +# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vmulnepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf5,0x65,0x2f,0x59,0xd4 + +# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf5,0x65,0xaf,0x59,0xd4 + +# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vmulnepbf16 zmm2, zmm3, zmm4 +0x62,0xf5,0x65,0x48,0x59,0xd4 + +# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vmulnepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf5,0x65,0x4f,0x59,0xd4 + +# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf5,0x65,0xcf,0x59,0xd4 + +# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vmulnepbf16 xmm2, xmm3, xmm4 +0x62,0xf5,0x65,0x08,0x59,0xd4 + +# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vmulnepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf5,0x65,0x0f,0x59,0xd4 + +# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf5,0x65,0x8f,0x59,0xd4 + +# ATT: vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf5,0x65,0x58,0x59,0x10 + +# ATT: vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f + +# ATT: vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf5,0x65,0xdf,0x59,0x52,0x80 + +# ATT: vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf5,0x65,0x38,0x59,0x10 + +# ATT: vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f + +# ATT: vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf5,0x65,0xbf,0x59,0x52,0x80 + +# ATT: vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf5,0x65,0x18,0x59,0x10 + +# ATT: vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f + +# ATT: vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf5,0x65,0x9f,0x59,0x52,0x80 + +# ATT: vrcppbf16 %xmm3, %xmm2 +# INTEL: vrcppbf16 xmm2, xmm3 +0x62,0xf6,0x7c,0x08,0x4c,0xd3 + +# ATT: vrcppbf16 %xmm3, %xmm2 {%k7} +# INTEL: vrcppbf16 xmm2 {k7}, xmm3 +0x62,0xf6,0x7c,0x0f,0x4c,0xd3 + +# ATT: vrcppbf16 %xmm3, %xmm2 {%k7} {z} +# INTEL: vrcppbf16 xmm2 {k7} {z}, xmm3 +0x62,0xf6,0x7c,0x8f,0x4c,0xd3 + +# ATT: vrcppbf16 %zmm3, %zmm2 +# INTEL: vrcppbf16 zmm2, zmm3 +0x62,0xf6,0x7c,0x48,0x4c,0xd3 + +# ATT: vrcppbf16 %zmm3, %zmm2 {%k7} +# INTEL: vrcppbf16 zmm2 {k7}, zmm3 +0x62,0xf6,0x7c,0x4f,0x4c,0xd3 + +# ATT: vrcppbf16 %zmm3, %zmm2 {%k7} {z} +# INTEL: vrcppbf16 zmm2 {k7} {z}, zmm3 +0x62,0xf6,0x7c,0xcf,0x4c,0xd3 + +# ATT: vrcppbf16 %ymm3, %ymm2 +# INTEL: vrcppbf16 ymm2, ymm3 +0x62,0xf6,0x7c,0x28,0x4c,0xd3 + +# ATT: vrcppbf16 %ymm3, %ymm2 {%k7} +# INTEL: vrcppbf16 ymm2 {k7}, ymm3 +0x62,0xf6,0x7c,0x2f,0x4c,0xd3 + +# ATT: vrcppbf16 %ymm3, %ymm2 {%k7} {z} +# INTEL: vrcppbf16 ymm2 {k7} {z}, ymm3 +0x62,0xf6,0x7c,0xaf,0x4c,0xd3 + +# ATT: vrcppbf16 268435456(%esp,%esi,8), %xmm2 +# INTEL: vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vrcppbf16 (%eax){1to8}, %xmm2 +# INTEL: vrcppbf16 xmm2, word ptr [eax]{1to8} +0x62,0xf6,0x7c,0x18,0x4c,0x10 + +# ATT: vrcppbf16 -512(,%ebp,2), %xmm2 +# INTEL: vrcppbf16 xmm2, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vrcppbf16 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f + +# ATT: vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80 + +# ATT: vrcppbf16 268435456(%esp,%esi,8), %ymm2 +# INTEL: vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vrcppbf16 (%eax){1to16}, %ymm2 +# INTEL: vrcppbf16 ymm2, word ptr [eax]{1to16} +0x62,0xf6,0x7c,0x38,0x4c,0x10 + +# ATT: vrcppbf16 -1024(,%ebp,2), %ymm2 +# INTEL: vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vrcppbf16 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f + +# ATT: vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80 + +# ATT: vrcppbf16 268435456(%esp,%esi,8), %zmm2 +# INTEL: vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vrcppbf16 (%eax){1to32}, %zmm2 +# INTEL: vrcppbf16 zmm2, word ptr [eax]{1to32} +0x62,0xf6,0x7c,0x58,0x4c,0x10 + +# ATT: vrcppbf16 -2048(,%ebp,2), %zmm2 +# INTEL: vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vrcppbf16 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f + +# ATT: vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80 + +# ATT: vreducenepbf16 $123, %zmm3, %zmm2 +# INTEL: vreducenepbf16 zmm2, zmm3, 123 +0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} +# INTEL: vreducenepbf16 zmm2 {k7}, zmm3, 123 +0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z} +# INTEL: vreducenepbf16 zmm2 {k7} {z}, zmm3, 123 +0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %ymm3, %ymm2 +# INTEL: vreducenepbf16 ymm2, ymm3, 123 +0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} +# INTEL: vreducenepbf16 ymm2 {k7}, ymm3, 123 +0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z} +# INTEL: vreducenepbf16 ymm2 {k7} {z}, ymm3, 123 +0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %xmm3, %xmm2 +# INTEL: vreducenepbf16 xmm2, xmm3, 123 +0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} +# INTEL: vreducenepbf16 xmm2 {k7}, xmm3, 123 +0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z} +# INTEL: vreducenepbf16 xmm2 {k7} {z}, xmm3, 123 +0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b + +# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2 +# INTEL: vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, (%eax){1to8}, %xmm2 +# INTEL: vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123 +0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b + +# ATT: vreducenepbf16 $123, -512(,%ebp,2), %xmm2 +# INTEL: vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123 +0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 +0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b + +# ATT: vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 +0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b + +# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2 +# INTEL: vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, (%eax){1to16}, %ymm2 +# INTEL: vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123 +0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b + +# ATT: vreducenepbf16 $123, -1024(,%ebp,2), %ymm2 +# INTEL: vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 +0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 +0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b + +# ATT: vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 +0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b + +# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2 +# INTEL: vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, (%eax){1to32}, %zmm2 +# INTEL: vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123 +0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b + +# ATT: vreducenepbf16 $123, -2048(,%ebp,2), %zmm2 +# INTEL: vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 +0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 +0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b + +# ATT: vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 +0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b + +# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 +# INTEL: vrndscalenepbf16 zmm2, zmm3, 123 +0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} +# INTEL: vrndscalenepbf16 zmm2 {k7}, zmm3, 123 +0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z} +# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123 +0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 +# INTEL: vrndscalenepbf16 ymm2, ymm3, 123 +0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} +# INTEL: vrndscalenepbf16 ymm2 {k7}, ymm3, 123 +0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z} +# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123 +0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 +# INTEL: vrndscalenepbf16 xmm2, xmm3, 123 +0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} +# INTEL: vrndscalenepbf16 xmm2 {k7}, xmm3, 123 +0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z} +# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123 +0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b + +# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2 +# INTEL: vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, (%eax){1to8}, %xmm2 +# INTEL: vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123 +0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2 +# INTEL: vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123 +0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 +0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b + +# ATT: vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 +0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b + +# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2 +# INTEL: vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, (%eax){1to16}, %ymm2 +# INTEL: vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123 +0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2 +# INTEL: vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 +0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 +0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b + +# ATT: vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 +0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b + +# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2 +# INTEL: vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, (%eax){1to32}, %zmm2 +# INTEL: vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123 +0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2 +# INTEL: vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 +0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 +0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b + +# ATT: vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 +0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b + +# ATT: vrsqrtpbf16 %xmm3, %xmm2 +# INTEL: vrsqrtpbf16 xmm2, xmm3 +0x62,0xf6,0x7c,0x08,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %xmm3, %xmm2 {%k7} +# INTEL: vrsqrtpbf16 xmm2 {k7}, xmm3 +0x62,0xf6,0x7c,0x0f,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z} +# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, xmm3 +0x62,0xf6,0x7c,0x8f,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %zmm3, %zmm2 +# INTEL: vrsqrtpbf16 zmm2, zmm3 +0x62,0xf6,0x7c,0x48,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %zmm3, %zmm2 {%k7} +# INTEL: vrsqrtpbf16 zmm2 {k7}, zmm3 +0x62,0xf6,0x7c,0x4f,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z} +# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, zmm3 +0x62,0xf6,0x7c,0xcf,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %ymm3, %ymm2 +# INTEL: vrsqrtpbf16 ymm2, ymm3 +0x62,0xf6,0x7c,0x28,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %ymm3, %ymm2 {%k7} +# INTEL: vrsqrtpbf16 ymm2 {k7}, ymm3 +0x62,0xf6,0x7c,0x2f,0x4e,0xd3 + +# ATT: vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z} +# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, ymm3 +0x62,0xf6,0x7c,0xaf,0x4e,0xd3 + +# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2 +# INTEL: vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vrsqrtpbf16 (%eax){1to8}, %xmm2 +# INTEL: vrsqrtpbf16 xmm2, word ptr [eax]{1to8} +0x62,0xf6,0x7c,0x18,0x4e,0x10 + +# ATT: vrsqrtpbf16 -512(,%ebp,2), %xmm2 +# INTEL: vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f + +# ATT: vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80 + +# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2 +# INTEL: vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vrsqrtpbf16 (%eax){1to16}, %ymm2 +# INTEL: vrsqrtpbf16 ymm2, word ptr [eax]{1to16} +0x62,0xf6,0x7c,0x38,0x4e,0x10 + +# ATT: vrsqrtpbf16 -1024(,%ebp,2), %ymm2 +# INTEL: vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f + +# ATT: vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80 + +# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2 +# INTEL: vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vrsqrtpbf16 (%eax){1to32}, %zmm2 +# INTEL: vrsqrtpbf16 zmm2, word ptr [eax]{1to32} +0x62,0xf6,0x7c,0x58,0x4e,0x10 + +# ATT: vrsqrtpbf16 -2048(,%ebp,2), %zmm2 +# INTEL: vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f + +# ATT: vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80 + +# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vscalefpbf16 ymm2, ymm3, ymm4 +0x62,0xf6,0x64,0x28,0x2c,0xd4 + +# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vscalefpbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf6,0x64,0x2f,0x2c,0xd4 + +# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf6,0x64,0xaf,0x2c,0xd4 + +# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vscalefpbf16 zmm2, zmm3, zmm4 +0x62,0xf6,0x64,0x48,0x2c,0xd4 + +# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vscalefpbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf6,0x64,0x4f,0x2c,0xd4 + +# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf6,0x64,0xcf,0x2c,0xd4 + +# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vscalefpbf16 xmm2, xmm3, xmm4 +0x62,0xf6,0x64,0x08,0x2c,0xd4 + +# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vscalefpbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf6,0x64,0x0f,0x2c,0xd4 + +# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf6,0x64,0x8f,0x2c,0xd4 + +# ATT: vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf6,0x64,0x58,0x2c,0x10 + +# ATT: vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f + +# ATT: vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80 + +# ATT: vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf6,0x64,0x38,0x2c,0x10 + +# ATT: vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f + +# ATT: vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80 + +# ATT: vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf6,0x64,0x18,0x2c,0x10 + +# ATT: vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f + +# ATT: vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80 + +# ATT: vsqrtnepbf16 %xmm3, %xmm2 +# INTEL: vsqrtnepbf16 xmm2, xmm3 +0x62,0xf5,0x7d,0x08,0x51,0xd3 + +# ATT: vsqrtnepbf16 %xmm3, %xmm2 {%k7} +# INTEL: vsqrtnepbf16 xmm2 {k7}, xmm3 +0x62,0xf5,0x7d,0x0f,0x51,0xd3 + +# ATT: vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z} +# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, xmm3 +0x62,0xf5,0x7d,0x8f,0x51,0xd3 + +# ATT: vsqrtnepbf16 %zmm3, %zmm2 +# INTEL: vsqrtnepbf16 zmm2, zmm3 +0x62,0xf5,0x7d,0x48,0x51,0xd3 + +# ATT: vsqrtnepbf16 %zmm3, %zmm2 {%k7} +# INTEL: vsqrtnepbf16 zmm2 {k7}, zmm3 +0x62,0xf5,0x7d,0x4f,0x51,0xd3 + +# ATT: vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z} +# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, zmm3 +0x62,0xf5,0x7d,0xcf,0x51,0xd3 + +# ATT: vsqrtnepbf16 %ymm3, %ymm2 +# INTEL: vsqrtnepbf16 ymm2, ymm3 +0x62,0xf5,0x7d,0x28,0x51,0xd3 + +# ATT: vsqrtnepbf16 %ymm3, %ymm2 {%k7} +# INTEL: vsqrtnepbf16 ymm2 {k7}, ymm3 +0x62,0xf5,0x7d,0x2f,0x51,0xd3 + +# ATT: vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z} +# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, ymm3 +0x62,0xf5,0x7d,0xaf,0x51,0xd3 + +# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2 +# INTEL: vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7} +# INTEL: vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsqrtnepbf16 (%eax){1to8}, %xmm2 +# INTEL: vsqrtnepbf16 xmm2, word ptr [eax]{1to8} +0x62,0xf5,0x7d,0x18,0x51,0x10 + +# ATT: vsqrtnepbf16 -512(,%ebp,2), %xmm2 +# INTEL: vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z} +# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f + +# ATT: vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80 + +# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2 +# INTEL: vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7} +# INTEL: vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsqrtnepbf16 (%eax){1to16}, %ymm2 +# INTEL: vsqrtnepbf16 ymm2, word ptr [eax]{1to16} +0x62,0xf5,0x7d,0x38,0x51,0x10 + +# ATT: vsqrtnepbf16 -1024(,%ebp,2), %ymm2 +# INTEL: vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z} +# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f + +# ATT: vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80 + +# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2 +# INTEL: vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7} +# INTEL: vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsqrtnepbf16 (%eax){1to32}, %zmm2 +# INTEL: vsqrtnepbf16 zmm2, word ptr [eax]{1to32} +0x62,0xf5,0x7d,0x58,0x51,0x10 + +# ATT: vsqrtnepbf16 -2048(,%ebp,2), %zmm2 +# INTEL: vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z} +# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f + +# ATT: vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80 + +# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 +# INTEL: vsubnepbf16 ymm2, ymm3, ymm4 +0x62,0xf5,0x65,0x28,0x5c,0xd4 + +# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vsubnepbf16 ymm2 {k7}, ymm3, ymm4 +0x62,0xf5,0x65,0x2f,0x5c,0xd4 + +# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +0x62,0xf5,0x65,0xaf,0x5c,0xd4 + +# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 +# INTEL: vsubnepbf16 zmm2, zmm3, zmm4 +0x62,0xf5,0x65,0x48,0x5c,0xd4 + +# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vsubnepbf16 zmm2 {k7}, zmm3, zmm4 +0x62,0xf5,0x65,0x4f,0x5c,0xd4 + +# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +0x62,0xf5,0x65,0xcf,0x5c,0xd4 + +# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 +# INTEL: vsubnepbf16 xmm2, xmm3, xmm4 +0x62,0xf5,0x65,0x08,0x5c,0xd4 + +# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vsubnepbf16 xmm2 {k7}, xmm3, xmm4 +0x62,0xf5,0x65,0x0f,0x5c,0xd4 + +# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +0x62,0xf5,0x65,0x8f,0x5c,0xd4 + +# ATT: vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2 +# INTEL: vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +0x62,0xf5,0x65,0x58,0x5c,0x10 + +# ATT: vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f + +# ATT: vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80 + +# ATT: vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2 +# INTEL: vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +0x62,0xf5,0x65,0x38,0x5c,0x10 + +# ATT: vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f + +# ATT: vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80 + +# ATT: vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2 +# INTEL: vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +0x62,0xf5,0x65,0x18,0x5c,0x10 + +# ATT: vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f + +# ATT: vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80 + diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt new file mode 100644 index 00000000000000..953ef8dd8a14c9 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt @@ -0,0 +1,3015 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vaddnepbf16 ymm22, ymm23, ymm24 +0x62,0x85,0x45,0x20,0x58,0xf0 + +# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vaddnepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x85,0x45,0x27,0x58,0xf0 + +# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x85,0x45,0xa7,0x58,0xf0 + +# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vaddnepbf16 zmm22, zmm23, zmm24 +0x62,0x85,0x45,0x40,0x58,0xf0 + +# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vaddnepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x85,0x45,0x47,0x58,0xf0 + +# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x85,0x45,0xc7,0x58,0xf0 + +# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vaddnepbf16 xmm22, xmm23, xmm24 +0x62,0x85,0x45,0x00,0x58,0xf0 + +# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vaddnepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x85,0x45,0x07,0x58,0xf0 + +# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x85,0x45,0x87,0x58,0xf0 + +# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00 + +# ATT: vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f + +# ATT: vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x45,0xd7,0x58,0x72,0x80 + +# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00 + +# ATT: vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f + +# ATT: vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x45,0xb7,0x58,0x72,0x80 + +# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00 + +# ATT: vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe5,0x45,0x87,0x58,0x71,0x7f + +# ATT: vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x45,0x97,0x58,0x72,0x80 + +# ATT: vcmppbf16 $123, %ymm24, %ymm23, %k5 +# INTEL: vcmppbf16 k5, ymm23, ymm24, 123 +0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b + +# ATT: vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm23, ymm24, 123 +0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b + +# ATT: vcmppbf16 $123, %xmm24, %xmm23, %k5 +# INTEL: vcmppbf16 k5, xmm23, xmm24, 123 +0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b + +# ATT: vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm23, xmm24, 123 +0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b + +# ATT: vcmppbf16 $123, %zmm24, %zmm23, %k5 +# INTEL: vcmppbf16 k5, zmm23, zmm24, 123 +0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b + +# ATT: vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm23, zmm24, 123 +0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b + +# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5 +# INTEL: vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5 +# INTEL: vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123 +0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5 +# INTEL: vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123 +0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123 +0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b + +# ATT: vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123 +0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b + +# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5 +# INTEL: vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5 +# INTEL: vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123 +0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5 +# INTEL: vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123 +0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123 +0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b + +# ATT: vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123 +0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b + +# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5 +# INTEL: vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123 +0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5 +# INTEL: vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123 +0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b + +# ATT: vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5 +# INTEL: vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123 +0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123 +0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b + +# ATT: vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7} +# INTEL: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123 +0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b + +# ATT: vcomsbf16 %xmm23, %xmm22 +# INTEL: vcomsbf16 xmm22, xmm23 +0x62,0xa5,0x7d,0x08,0x2f,0xf7 + +# ATT: vcomsbf16 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcomsbf16 291(%r8,%rax,4), %xmm22 +# INTEL: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcomsbf16 (%rip), %xmm22 +# INTEL: vcomsbf16 xmm22, word ptr [rip] +0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vcomsbf16 -64(,%rbp,2), %xmm22 +# INTEL: vcomsbf16 xmm22, word ptr [2*rbp - 64] +0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vcomsbf16 254(%rcx), %xmm22 +# INTEL: vcomsbf16 xmm22, word ptr [rcx + 254] +0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f + +# ATT: vcomsbf16 -256(%rdx), %xmm22 +# INTEL: vcomsbf16 xmm22, word ptr [rdx - 256] +0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80 + +# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vdivnepbf16 ymm22, ymm23, ymm24 +0x62,0x85,0x45,0x20,0x5e,0xf0 + +# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vdivnepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x85,0x45,0x27,0x5e,0xf0 + +# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x85,0x45,0xa7,0x5e,0xf0 + +# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vdivnepbf16 zmm22, zmm23, zmm24 +0x62,0x85,0x45,0x40,0x5e,0xf0 + +# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vdivnepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x85,0x45,0x47,0x5e,0xf0 + +# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x85,0x45,0xc7,0x5e,0xf0 + +# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vdivnepbf16 xmm22, xmm23, xmm24 +0x62,0x85,0x45,0x00,0x5e,0xf0 + +# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vdivnepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x85,0x45,0x07,0x5e,0xf0 + +# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x85,0x45,0x87,0x5e,0xf0 + +# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f + +# ATT: vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80 + +# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f + +# ATT: vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80 + +# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f + +# ATT: vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x45,0x97,0x5e,0x72,0x80 + +# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0x98,0xf0 + +# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0x98,0xf0 + +# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f + +# ATT: vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0x98,0x72,0x80 + +# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f + +# ATT: vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0x98,0x72,0x80 + +# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0x98,0x71,0x7f + +# ATT: vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0x98,0x72,0x80 + +# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xa8,0xf0 + +# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f + +# ATT: vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80 + +# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f + +# ATT: vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80 + +# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f + +# ATT: vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xa8,0x72,0x80 + +# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xb8,0xf0 + +# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f + +# ATT: vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80 + +# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f + +# ATT: vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80 + +# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f + +# ATT: vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xb8,0x72,0x80 + +# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0x9a,0xf0 + +# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f + +# ATT: vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80 + +# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f + +# ATT: vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80 + +# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f + +# ATT: vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0x9a,0x72,0x80 + +# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xaa,0xf0 + +# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f + +# ATT: vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80 + +# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f + +# ATT: vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80 + +# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f + +# ATT: vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xaa,0x72,0x80 + +# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xba,0xf0 + +# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xba,0xf0 + +# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f + +# ATT: vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xba,0x72,0x80 + +# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f + +# ATT: vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xba,0x72,0x80 + +# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xba,0x71,0x7f + +# ATT: vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xba,0x72,0x80 + +# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0x9c,0xf0 + +# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f + +# ATT: vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80 + +# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f + +# ATT: vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80 + +# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f + +# ATT: vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0x9c,0x72,0x80 + +# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xac,0xf0 + +# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f + +# ATT: vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xac,0x72,0x80 + +# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f + +# ATT: vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xac,0x72,0x80 + +# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xac,0x71,0x7f + +# ATT: vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xac,0x72,0x80 + +# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xbc,0xf0 + +# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f + +# ATT: vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80 + +# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f + +# ATT: vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80 + +# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f + +# ATT: vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xbc,0x72,0x80 + +# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0x9e,0xf0 + +# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f + +# ATT: vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80 + +# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f + +# ATT: vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80 + +# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f + +# ATT: vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0x9e,0x72,0x80 + +# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xae,0xf0 + +# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f + +# ATT: vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xae,0x72,0x80 + +# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f + +# ATT: vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xae,0x72,0x80 + +# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xae,0x71,0x7f + +# ATT: vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xae,0x72,0x80 + +# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0xbe,0xf0 + +# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f + +# ATT: vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80 + +# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f + +# ATT: vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80 + +# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00 + +# ATT: vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f + +# ATT: vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0xbe,0x72,0x80 + +# ATT: vfpclasspbf16 $123, %zmm23, %k5 +# INTEL: vfpclasspbf16 k5, zmm23, 123 +0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b + +# ATT: vfpclasspbf16 $123, %zmm23, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, zmm23, 123 +0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b + +# ATT: vfpclasspbf16 $123, %ymm23, %k5 +# INTEL: vfpclasspbf16 k5, ymm23, 123 +0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b + +# ATT: vfpclasspbf16 $123, %ymm23, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, ymm23, 123 +0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b + +# ATT: vfpclasspbf16 $123, %xmm23, %k5 +# INTEL: vfpclasspbf16 k5, xmm23, 123 +0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b + +# ATT: vfpclasspbf16 $123, %xmm23, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, xmm23, 123 +0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b + +# ATT: vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5 +# INTEL: vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vfpclasspbf16 $123, (%rip){1to8}, %k5 +# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to8}, 123 +0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b + +# ATT: vfpclasspbf16x $123, -512(,%rbp,2), %k5 +# INTEL: vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123 +0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123 +0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b + +# ATT: vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123 +0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b + +# ATT: vfpclasspbf16 $123, (%rip){1to16}, %k5 +# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to16}, 123 +0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b + +# ATT: vfpclasspbf16y $123, -1024(,%rbp,2), %k5 +# INTEL: vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123 +0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123 +0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b + +# ATT: vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123 +0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b + +# ATT: vfpclasspbf16 $123, (%rip){1to32}, %k5 +# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to32}, 123 +0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b + +# ATT: vfpclasspbf16z $123, -2048(,%rbp,2), %k5 +# INTEL: vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123 +0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123 +0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b + +# ATT: vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7} +# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123 +0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b + +# ATT: vgetexppbf16 %xmm23, %xmm22 +# INTEL: vgetexppbf16 xmm22, xmm23 +0x62,0xa5,0x7d,0x08,0x42,0xf7 + +# ATT: vgetexppbf16 %xmm23, %xmm22 {%k7} +# INTEL: vgetexppbf16 xmm22 {k7}, xmm23 +0x62,0xa5,0x7d,0x0f,0x42,0xf7 + +# ATT: vgetexppbf16 %xmm23, %xmm22 {%k7} {z} +# INTEL: vgetexppbf16 xmm22 {k7} {z}, xmm23 +0x62,0xa5,0x7d,0x8f,0x42,0xf7 + +# ATT: vgetexppbf16 %zmm23, %zmm22 +# INTEL: vgetexppbf16 zmm22, zmm23 +0x62,0xa5,0x7d,0x48,0x42,0xf7 + +# ATT: vgetexppbf16 %zmm23, %zmm22 {%k7} +# INTEL: vgetexppbf16 zmm22 {k7}, zmm23 +0x62,0xa5,0x7d,0x4f,0x42,0xf7 + +# ATT: vgetexppbf16 %zmm23, %zmm22 {%k7} {z} +# INTEL: vgetexppbf16 zmm22 {k7} {z}, zmm23 +0x62,0xa5,0x7d,0xcf,0x42,0xf7 + +# ATT: vgetexppbf16 %ymm23, %ymm22 +# INTEL: vgetexppbf16 ymm22, ymm23 +0x62,0xa5,0x7d,0x28,0x42,0xf7 + +# ATT: vgetexppbf16 %ymm23, %ymm22 {%k7} +# INTEL: vgetexppbf16 ymm22 {k7}, ymm23 +0x62,0xa5,0x7d,0x2f,0x42,0xf7 + +# ATT: vgetexppbf16 %ymm23, %ymm22 {%k7} {z} +# INTEL: vgetexppbf16 ymm22 {k7} {z}, ymm23 +0x62,0xa5,0x7d,0xaf,0x42,0xf7 + +# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vgetexppbf16 (%rip){1to8}, %xmm22 +# INTEL: vgetexppbf16 xmm22, word ptr [rip]{1to8} +0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00 + +# ATT: vgetexppbf16 -512(,%rbp,2), %xmm22 +# INTEL: vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f + +# ATT: vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80 + +# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vgetexppbf16 (%rip){1to16}, %ymm22 +# INTEL: vgetexppbf16 ymm22, word ptr [rip]{1to16} +0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00 + +# ATT: vgetexppbf16 -1024(,%rbp,2), %ymm22 +# INTEL: vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f + +# ATT: vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80 + +# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vgetexppbf16 (%rip){1to32}, %zmm22 +# INTEL: vgetexppbf16 zmm22, word ptr [rip]{1to32} +0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00 + +# ATT: vgetexppbf16 -2048(,%rbp,2), %zmm22 +# INTEL: vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f + +# ATT: vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80 + +# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 +# INTEL: vgetmantpbf16 zmm22, zmm23, 123 +0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} +# INTEL: vgetmantpbf16 zmm22 {k7}, zmm23, 123 +0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z} +# INTEL: vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123 +0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 +# INTEL: vgetmantpbf16 ymm22, ymm23, 123 +0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} +# INTEL: vgetmantpbf16 ymm22 {k7}, ymm23, 123 +0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z} +# INTEL: vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123 +0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 +# INTEL: vgetmantpbf16 xmm22, xmm23, 123 +0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} +# INTEL: vgetmantpbf16 xmm22 {k7}, xmm23, 123 +0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z} +# INTEL: vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123 +0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b + +# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, (%rip){1to8}, %xmm22 +# INTEL: vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123 +0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, -512(,%rbp,2), %xmm22 +# INTEL: vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123 +0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 +0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b + +# ATT: vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 +0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b + +# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, (%rip){1to16}, %ymm22 +# INTEL: vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123 +0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22 +# INTEL: vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 +0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 +0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b + +# ATT: vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 +0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b + +# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, (%rip){1to32}, %zmm22 +# INTEL: vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123 +0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22 +# INTEL: vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 +0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 +0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b + +# ATT: vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 +0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b + +# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vmaxpbf16 ymm22, ymm23, ymm24 +0x62,0x85,0x45,0x20,0x5f,0xf0 + +# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vmaxpbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x85,0x45,0x27,0x5f,0xf0 + +# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x85,0x45,0xa7,0x5f,0xf0 + +# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vmaxpbf16 zmm22, zmm23, zmm24 +0x62,0x85,0x45,0x40,0x5f,0xf0 + +# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vmaxpbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x85,0x45,0x47,0x5f,0xf0 + +# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x85,0x45,0xc7,0x5f,0xf0 + +# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vmaxpbf16 xmm22, xmm23, xmm24 +0x62,0x85,0x45,0x00,0x5f,0xf0 + +# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vmaxpbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x85,0x45,0x07,0x5f,0xf0 + +# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x85,0x45,0x87,0x5f,0xf0 + +# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f + +# ATT: vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80 + +# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f + +# ATT: vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80 + +# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f + +# ATT: vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x45,0x97,0x5f,0x72,0x80 + +# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vminpbf16 ymm22, ymm23, ymm24 +0x62,0x85,0x45,0x20,0x5d,0xf0 + +# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vminpbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x85,0x45,0x27,0x5d,0xf0 + +# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x85,0x45,0xa7,0x5d,0xf0 + +# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vminpbf16 zmm22, zmm23, zmm24 +0x62,0x85,0x45,0x40,0x5d,0xf0 + +# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vminpbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x85,0x45,0x47,0x5d,0xf0 + +# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x85,0x45,0xc7,0x5d,0xf0 + +# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vminpbf16 xmm22, xmm23, xmm24 +0x62,0x85,0x45,0x00,0x5d,0xf0 + +# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vminpbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x85,0x45,0x07,0x5d,0xf0 + +# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x85,0x45,0x87,0x5d,0xf0 + +# ATT: vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vminpbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vminpbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00 + +# ATT: vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f + +# ATT: vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80 + +# ATT: vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vminpbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vminpbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00 + +# ATT: vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f + +# ATT: vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80 + +# ATT: vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vminpbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vminpbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00 + +# ATT: vminpbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f + +# ATT: vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x45,0x97,0x5d,0x72,0x80 + +# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vmulnepbf16 ymm22, ymm23, ymm24 +0x62,0x85,0x45,0x20,0x59,0xf0 + +# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vmulnepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x85,0x45,0x27,0x59,0xf0 + +# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x85,0x45,0xa7,0x59,0xf0 + +# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vmulnepbf16 zmm22, zmm23, zmm24 +0x62,0x85,0x45,0x40,0x59,0xf0 + +# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vmulnepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x85,0x45,0x47,0x59,0xf0 + +# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x85,0x45,0xc7,0x59,0xf0 + +# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vmulnepbf16 xmm22, xmm23, xmm24 +0x62,0x85,0x45,0x00,0x59,0xf0 + +# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vmulnepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x85,0x45,0x07,0x59,0xf0 + +# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x85,0x45,0x87,0x59,0xf0 + +# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f + +# ATT: vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x45,0xd7,0x59,0x72,0x80 + +# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f + +# ATT: vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x45,0xb7,0x59,0x72,0x80 + +# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe5,0x45,0x87,0x59,0x71,0x7f + +# ATT: vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x45,0x97,0x59,0x72,0x80 + +# ATT: vrcppbf16 %xmm23, %xmm22 +# INTEL: vrcppbf16 xmm22, xmm23 +0x62,0xa6,0x7c,0x08,0x4c,0xf7 + +# ATT: vrcppbf16 %xmm23, %xmm22 {%k7} +# INTEL: vrcppbf16 xmm22 {k7}, xmm23 +0x62,0xa6,0x7c,0x0f,0x4c,0xf7 + +# ATT: vrcppbf16 %xmm23, %xmm22 {%k7} {z} +# INTEL: vrcppbf16 xmm22 {k7} {z}, xmm23 +0x62,0xa6,0x7c,0x8f,0x4c,0xf7 + +# ATT: vrcppbf16 %zmm23, %zmm22 +# INTEL: vrcppbf16 zmm22, zmm23 +0x62,0xa6,0x7c,0x48,0x4c,0xf7 + +# ATT: vrcppbf16 %zmm23, %zmm22 {%k7} +# INTEL: vrcppbf16 zmm22 {k7}, zmm23 +0x62,0xa6,0x7c,0x4f,0x4c,0xf7 + +# ATT: vrcppbf16 %zmm23, %zmm22 {%k7} {z} +# INTEL: vrcppbf16 zmm22 {k7} {z}, zmm23 +0x62,0xa6,0x7c,0xcf,0x4c,0xf7 + +# ATT: vrcppbf16 %ymm23, %ymm22 +# INTEL: vrcppbf16 ymm22, ymm23 +0x62,0xa6,0x7c,0x28,0x4c,0xf7 + +# ATT: vrcppbf16 %ymm23, %ymm22 {%k7} +# INTEL: vrcppbf16 ymm22 {k7}, ymm23 +0x62,0xa6,0x7c,0x2f,0x4c,0xf7 + +# ATT: vrcppbf16 %ymm23, %ymm22 {%k7} {z} +# INTEL: vrcppbf16 ymm22 {k7} {z}, ymm23 +0x62,0xa6,0x7c,0xaf,0x4c,0xf7 + +# ATT: vrcppbf16 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vrcppbf16 (%rip){1to8}, %xmm22 +# INTEL: vrcppbf16 xmm22, word ptr [rip]{1to8} +0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vrcppbf16 -512(,%rbp,2), %xmm22 +# INTEL: vrcppbf16 xmm22, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vrcppbf16 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f + +# ATT: vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80 + +# ATT: vrcppbf16 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vrcppbf16 (%rip){1to16}, %ymm22 +# INTEL: vrcppbf16 ymm22, word ptr [rip]{1to16} +0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vrcppbf16 -1024(,%rbp,2), %ymm22 +# INTEL: vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vrcppbf16 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f + +# ATT: vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80 + +# ATT: vrcppbf16 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vrcppbf16 (%rip){1to32}, %zmm22 +# INTEL: vrcppbf16 zmm22, word ptr [rip]{1to32} +0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vrcppbf16 -2048(,%rbp,2), %zmm22 +# INTEL: vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vrcppbf16 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f + +# ATT: vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80 + +# ATT: vreducenepbf16 $123, %zmm23, %zmm22 +# INTEL: vreducenepbf16 zmm22, zmm23, 123 +0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} +# INTEL: vreducenepbf16 zmm22 {k7}, zmm23, 123 +0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z} +# INTEL: vreducenepbf16 zmm22 {k7} {z}, zmm23, 123 +0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %ymm23, %ymm22 +# INTEL: vreducenepbf16 ymm22, ymm23, 123 +0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} +# INTEL: vreducenepbf16 ymm22 {k7}, ymm23, 123 +0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z} +# INTEL: vreducenepbf16 ymm22 {k7} {z}, ymm23, 123 +0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %xmm23, %xmm22 +# INTEL: vreducenepbf16 xmm22, xmm23, 123 +0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} +# INTEL: vreducenepbf16 xmm22 {k7}, xmm23, 123 +0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z} +# INTEL: vreducenepbf16 xmm22 {k7} {z}, xmm23, 123 +0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b + +# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, (%rip){1to8}, %xmm22 +# INTEL: vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123 +0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, -512(,%rbp,2), %xmm22 +# INTEL: vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123 +0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 +0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b + +# ATT: vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 +0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b + +# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, (%rip){1to16}, %ymm22 +# INTEL: vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123 +0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, -1024(,%rbp,2), %ymm22 +# INTEL: vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 +0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 +0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b + +# ATT: vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 +0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b + +# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, (%rip){1to32}, %zmm22 +# INTEL: vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123 +0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vreducenepbf16 $123, -2048(,%rbp,2), %zmm22 +# INTEL: vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 +0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 +0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b + +# ATT: vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 +0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b + +# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 +# INTEL: vrndscalenepbf16 zmm22, zmm23, 123 +0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} +# INTEL: vrndscalenepbf16 zmm22 {k7}, zmm23, 123 +0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z} +# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123 +0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 +# INTEL: vrndscalenepbf16 ymm22, ymm23, 123 +0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} +# INTEL: vrndscalenepbf16 ymm22 {k7}, ymm23, 123 +0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z} +# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123 +0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 +# INTEL: vrndscalenepbf16 xmm22, xmm23, 123 +0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} +# INTEL: vrndscalenepbf16 xmm22 {k7}, xmm23, 123 +0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z} +# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123 +0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b + +# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, (%rip){1to8}, %xmm22 +# INTEL: vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123 +0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22 +# INTEL: vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123 +0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 +0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b + +# ATT: vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 +0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b + +# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, (%rip){1to16}, %ymm22 +# INTEL: vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123 +0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22 +# INTEL: vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 +0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 +0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b + +# ATT: vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 +0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b + +# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, (%rip){1to32}, %zmm22 +# INTEL: vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123 +0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22 +# INTEL: vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 +0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 +0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b + +# ATT: vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 +0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b + +# ATT: vrsqrtpbf16 %xmm23, %xmm22 +# INTEL: vrsqrtpbf16 xmm22, xmm23 +0x62,0xa6,0x7c,0x08,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %xmm23, %xmm22 {%k7} +# INTEL: vrsqrtpbf16 xmm22 {k7}, xmm23 +0x62,0xa6,0x7c,0x0f,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z} +# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, xmm23 +0x62,0xa6,0x7c,0x8f,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %zmm23, %zmm22 +# INTEL: vrsqrtpbf16 zmm22, zmm23 +0x62,0xa6,0x7c,0x48,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %zmm23, %zmm22 {%k7} +# INTEL: vrsqrtpbf16 zmm22 {k7}, zmm23 +0x62,0xa6,0x7c,0x4f,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z} +# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, zmm23 +0x62,0xa6,0x7c,0xcf,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %ymm23, %ymm22 +# INTEL: vrsqrtpbf16 ymm22, ymm23 +0x62,0xa6,0x7c,0x28,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %ymm23, %ymm22 {%k7} +# INTEL: vrsqrtpbf16 ymm22 {k7}, ymm23 +0x62,0xa6,0x7c,0x2f,0x4e,0xf7 + +# ATT: vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z} +# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, ymm23 +0x62,0xa6,0x7c,0xaf,0x4e,0xf7 + +# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vrsqrtpbf16 (%rip){1to8}, %xmm22 +# INTEL: vrsqrtpbf16 xmm22, word ptr [rip]{1to8} +0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vrsqrtpbf16 -512(,%rbp,2), %xmm22 +# INTEL: vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f + +# ATT: vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80 + +# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vrsqrtpbf16 (%rip){1to16}, %ymm22 +# INTEL: vrsqrtpbf16 ymm22, word ptr [rip]{1to16} +0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vrsqrtpbf16 -1024(,%rbp,2), %ymm22 +# INTEL: vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f + +# ATT: vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80 + +# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vrsqrtpbf16 (%rip){1to32}, %zmm22 +# INTEL: vrsqrtpbf16 zmm22, word ptr [rip]{1to32} +0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vrsqrtpbf16 -2048(,%rbp,2), %zmm22 +# INTEL: vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f + +# ATT: vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80 + +# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vscalefpbf16 ymm22, ymm23, ymm24 +0x62,0x86,0x44,0x20,0x2c,0xf0 + +# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vscalefpbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x86,0x44,0x27,0x2c,0xf0 + +# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x86,0x44,0xa7,0x2c,0xf0 + +# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vscalefpbf16 zmm22, zmm23, zmm24 +0x62,0x86,0x44,0x40,0x2c,0xf0 + +# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vscalefpbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x86,0x44,0x47,0x2c,0xf0 + +# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x86,0x44,0xc7,0x2c,0xf0 + +# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vscalefpbf16 xmm22, xmm23, xmm24 +0x62,0x86,0x44,0x00,0x2c,0xf0 + +# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vscalefpbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x86,0x44,0x07,0x2c,0xf0 + +# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x86,0x44,0x87,0x2c,0xf0 + +# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f + +# ATT: vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80 + +# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f + +# ATT: vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80 + +# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f + +# ATT: vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe6,0x44,0x97,0x2c,0x72,0x80 + +# ATT: vsqrtnepbf16 %xmm23, %xmm22 +# INTEL: vsqrtnepbf16 xmm22, xmm23 +0x62,0xa5,0x7d,0x08,0x51,0xf7 + +# ATT: vsqrtnepbf16 %xmm23, %xmm22 {%k7} +# INTEL: vsqrtnepbf16 xmm22 {k7}, xmm23 +0x62,0xa5,0x7d,0x0f,0x51,0xf7 + +# ATT: vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z} +# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, xmm23 +0x62,0xa5,0x7d,0x8f,0x51,0xf7 + +# ATT: vsqrtnepbf16 %zmm23, %zmm22 +# INTEL: vsqrtnepbf16 zmm22, zmm23 +0x62,0xa5,0x7d,0x48,0x51,0xf7 + +# ATT: vsqrtnepbf16 %zmm23, %zmm22 {%k7} +# INTEL: vsqrtnepbf16 zmm22 {k7}, zmm23 +0x62,0xa5,0x7d,0x4f,0x51,0xf7 + +# ATT: vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z} +# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, zmm23 +0x62,0xa5,0x7d,0xcf,0x51,0xf7 + +# ATT: vsqrtnepbf16 %ymm23, %ymm22 +# INTEL: vsqrtnepbf16 ymm22, ymm23 +0x62,0xa5,0x7d,0x28,0x51,0xf7 + +# ATT: vsqrtnepbf16 %ymm23, %ymm22 {%k7} +# INTEL: vsqrtnepbf16 ymm22 {k7}, ymm23 +0x62,0xa5,0x7d,0x2f,0x51,0xf7 + +# ATT: vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z} +# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, ymm23 +0x62,0xa5,0x7d,0xaf,0x51,0xf7 + +# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7} +# INTEL: vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsqrtnepbf16 (%rip){1to8}, %xmm22 +# INTEL: vsqrtnepbf16 xmm22, word ptr [rip]{1to8} +0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsqrtnepbf16 -512(,%rbp,2), %xmm22 +# INTEL: vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z} +# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f + +# ATT: vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80 + +# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22 +# INTEL: vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7} +# INTEL: vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsqrtnepbf16 (%rip){1to16}, %ymm22 +# INTEL: vsqrtnepbf16 ymm22, word ptr [rip]{1to16} +0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsqrtnepbf16 -1024(,%rbp,2), %ymm22 +# INTEL: vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z} +# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f + +# ATT: vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80 + +# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsqrtnepbf16 (%rip){1to32}, %zmm22 +# INTEL: vsqrtnepbf16 zmm22, word ptr [rip]{1to32} +0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsqrtnepbf16 -2048(,%rbp,2), %zmm22 +# INTEL: vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f + +# ATT: vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80 + +# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 +# INTEL: vsubnepbf16 ymm22, ymm23, ymm24 +0x62,0x85,0x45,0x20,0x5c,0xf0 + +# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vsubnepbf16 ymm22 {k7}, ymm23, ymm24 +0x62,0x85,0x45,0x27,0x5c,0xf0 + +# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +0x62,0x85,0x45,0xa7,0x5c,0xf0 + +# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 +# INTEL: vsubnepbf16 zmm22, zmm23, zmm24 +0x62,0x85,0x45,0x40,0x5c,0xf0 + +# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vsubnepbf16 zmm22 {k7}, zmm23, zmm24 +0x62,0x85,0x45,0x47,0x5c,0xf0 + +# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +0x62,0x85,0x45,0xc7,0x5c,0xf0 + +# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 +# INTEL: vsubnepbf16 xmm22, xmm23, xmm24 +0x62,0x85,0x45,0x00,0x5c,0xf0 + +# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vsubnepbf16 xmm22 {k7}, xmm23, xmm24 +0x62,0x85,0x45,0x07,0x5c,0xf0 + +# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +0x62,0x85,0x45,0x87,0x5c,0xf0 + +# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22 +# INTEL: vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f + +# ATT: vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80 + +# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22 +# INTEL: vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f + +# ATT: vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80 + +# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22 +# INTEL: vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f + +# ATT: vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +0x62,0xe5,0x45,0x97,0x5c,0x72,0x80 + diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-att.s b/llvm/test/MC/X86/avx10.2-bf16-32-att.s new file mode 100644 index 00000000000000..9f62743177c9bd --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-bf16-32-att.s @@ -0,0 +1,3014 @@ +// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s + +// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0xd4] + vaddnepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0xd4] + vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0xd4] + vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0xd4] + vaddnepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0xd4] + vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0xd4] + vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0xd4] + vaddnepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0xd4] + vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0xd4] + vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10] + vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00] + vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x58,0x10] + vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff] + vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f] + vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x58,0x52,0x80] + vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10] + vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00] + vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x58,0x10] + vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff] + vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f] + vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x58,0x52,0x80] + vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10] + vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00] + vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x58,0x10] + vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff] + vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f] + vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x58,0x52,0x80] + vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vcmppbf16 $123, %ymm4, %ymm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b] + vcmppbf16 $123, %ymm4, %ymm3, %k5 + +// CHECK: vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b] + vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, %xmm4, %xmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b] + vcmppbf16 $123, %xmm4, %xmm3, %k5 + +// CHECK: vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b] + vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, %zmm4, %zmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b] + vcmppbf16 $123, %zmm4, %zmm3, %k5 + +// CHECK: vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b] + vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5 + +// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b] + vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5 + +// CHECK: vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5 + +// CHECK: vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b] + vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b] + vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5 + +// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b] + vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5 + +// CHECK: vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5 + +// CHECK: vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b] + vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b] + vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5 + +// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b] + vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5 + +// CHECK: vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5 + +// CHECK: vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b] + vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7} + +// CHECK: vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b] + vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7} + +// CHECK: vcomsbf16 %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xd3] + vcomsbf16 %xmm3, %xmm2 + +// CHECK: vcomsbf16 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomsbf16 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcomsbf16 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomsbf16 291(%edi,%eax,4), %xmm2 + +// CHECK: vcomsbf16 (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x10] + vcomsbf16 (%eax), %xmm2 + +// CHECK: vcomsbf16 -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] + vcomsbf16 -64(,%ebp,2), %xmm2 + +// CHECK: vcomsbf16 254(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f] + vcomsbf16 254(%ecx), %xmm2 + +// CHECK: vcomsbf16 -256(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80] + vcomsbf16 -256(%edx), %xmm2 + +// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0xd4] + vdivnepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0xd4] + vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0xd4] + vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0xd4] + vdivnepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0xd4] + vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0xd4] + vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0xd4] + vdivnepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0xd4] + vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0xd4] + vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10] + vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00] + vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5e,0x10] + vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff] + vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f] + vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80] + vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10] + vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00] + vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5e,0x10] + vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff] + vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f] + vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80] + vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10] + vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00] + vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5e,0x10] + vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff] + vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f] + vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80] + vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0xd4] + vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0xd4] + vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0xd4] + vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0xd4] + vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0xd4] + vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0xd4] + vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0xd4] + vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0xd4] + vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0xd4] + vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x98,0x10] + vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f] + vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x98,0x52,0x80] + vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x98,0x10] + vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f] + vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x98,0x52,0x80] + vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x98,0x10] + vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f] + vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x98,0x52,0x80] + vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0xd4] + vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0xd4] + vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0xd4] + vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0xd4] + vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0xd4] + vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0xd4] + vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0xd4] + vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0xd4] + vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0xd4] + vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xa8,0x10] + vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f] + vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80] + vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xa8,0x10] + vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f] + vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80] + vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xa8,0x10] + vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f] + vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80] + vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0xd4] + vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0xd4] + vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0xd4] + vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0xd4] + vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0xd4] + vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0xd4] + vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0xd4] + vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0xd4] + vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0xd4] + vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xb8,0x10] + vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f] + vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80] + vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xb8,0x10] + vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f] + vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80] + vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xb8,0x10] + vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f] + vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80] + vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0xd4] + vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0xd4] + vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0xd4] + vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0xd4] + vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0xd4] + vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0xd4] + vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0xd4] + vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0xd4] + vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0xd4] + vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9a,0x10] + vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f] + vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80] + vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9a,0x10] + vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f] + vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80] + vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9a,0x10] + vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f] + vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80] + vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0xd4] + vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0xd4] + vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0xd4] + vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0xd4] + vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0xd4] + vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0xd4] + vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0xd4] + vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0xd4] + vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0xd4] + vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xaa,0x10] + vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f] + vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80] + vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xaa,0x10] + vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f] + vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80] + vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xaa,0x10] + vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f] + vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80] + vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0xd4] + vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0xd4] + vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0xd4] + vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0xd4] + vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0xd4] + vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0xd4] + vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0xd4] + vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0xd4] + vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0xd4] + vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xba,0x10] + vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f] + vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xba,0x52,0x80] + vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xba,0x10] + vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f] + vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xba,0x52,0x80] + vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xba,0x10] + vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f] + vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xba,0x52,0x80] + vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0xd4] + vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0xd4] + vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0xd4] + vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0xd4] + vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0xd4] + vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0xd4] + vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0xd4] + vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0xd4] + vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0xd4] + vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9c,0x10] + vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f] + vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80] + vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9c,0x10] + vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f] + vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80] + vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9c,0x10] + vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f] + vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80] + vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0xd4] + vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0xd4] + vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0xd4] + vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0xd4] + vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0xd4] + vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0xd4] + vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0xd4] + vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0xd4] + vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0xd4] + vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xac,0x10] + vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f] + vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xac,0x52,0x80] + vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xac,0x10] + vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f] + vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xac,0x52,0x80] + vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xac,0x10] + vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f] + vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xac,0x52,0x80] + vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0xd4] + vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0xd4] + vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0xd4] + vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0xd4] + vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0xd4] + vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0xd4] + vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0xd4] + vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0xd4] + vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0xd4] + vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbc,0x10] + vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f] + vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80] + vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbc,0x10] + vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f] + vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80] + vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbc,0x10] + vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f] + vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80] + vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0xd4] + vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0xd4] + vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0xd4] + vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0xd4] + vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0xd4] + vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0xd4] + vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0xd4] + vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0xd4] + vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0xd4] + vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9e,0x10] + vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f] + vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80] + vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9e,0x10] + vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f] + vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80] + vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9e,0x10] + vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f] + vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80] + vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0xd4] + vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0xd4] + vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0xd4] + vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0xd4] + vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0xd4] + vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0xd4] + vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0xd4] + vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0xd4] + vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0xd4] + vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xae,0x10] + vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f] + vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xae,0x52,0x80] + vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xae,0x10] + vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f] + vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xae,0x52,0x80] + vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xae,0x10] + vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f] + vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xae,0x52,0x80] + vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0xd4] + vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0xd4] + vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0xd4] + vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0xd4] + vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0xd4] + vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0xd4] + vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0xd4] + vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0xd4] + vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0xd4] + vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbe,0x10] + vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f] + vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80] + vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbe,0x10] + vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f] + vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80] + vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbe,0x10] + vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f] + vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80] + vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vfpclasspbf16 $123, %zmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b] + vfpclasspbf16 $123, %zmm3, %k5 + +// CHECK: vfpclasspbf16 $123, %zmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b] + vfpclasspbf16 $123, %zmm3, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, %ymm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b] + vfpclasspbf16 $123, %ymm3, %k5 + +// CHECK: vfpclasspbf16 $123, %ymm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b] + vfpclasspbf16 $123, %ymm3, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, %xmm3, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b] + vfpclasspbf16 $123, %xmm3, %k5 + +// CHECK: vfpclasspbf16 $123, %xmm3, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b] + vfpclasspbf16 $123, %xmm3, %k5 {%k7} + +// CHECK: vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5 + +// CHECK: vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, (%eax){1to8}, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b] + vfpclasspbf16 $123, (%eax){1to8}, %k5 + +// CHECK: vfpclasspbf16x $123, -512(,%ebp,2), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vfpclasspbf16x $123, -512(,%ebp,2), %k5 + +// CHECK: vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, (%eax){1to16}, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b] + vfpclasspbf16 $123, (%eax){1to16}, %k5 + +// CHECK: vfpclasspbf16y $123, -1024(,%ebp,2), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vfpclasspbf16y $123, -1024(,%ebp,2), %k5 + +// CHECK: vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, (%eax){1to32}, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b] + vfpclasspbf16 $123, (%eax){1to32}, %k5 + +// CHECK: vfpclasspbf16z $123, -2048(,%ebp,2), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vfpclasspbf16z $123, -2048(,%ebp,2), %k5 + +// CHECK: vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7} + +// CHECK: vgetexppbf16 %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3] + vgetexppbf16 %xmm3, %xmm2 + +// CHECK: vgetexppbf16 %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3] + vgetexppbf16 %xmm3, %xmm2 {%k7} + +// CHECK: vgetexppbf16 %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3] + vgetexppbf16 %xmm3, %xmm2 {%k7} {z} + +// CHECK: vgetexppbf16 %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3] + vgetexppbf16 %zmm3, %zmm2 + +// CHECK: vgetexppbf16 %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3] + vgetexppbf16 %zmm3, %zmm2 {%k7} + +// CHECK: vgetexppbf16 %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3] + vgetexppbf16 %zmm3, %zmm2 {%k7} {z} + +// CHECK: vgetexppbf16 %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3] + vgetexppbf16 %ymm3, %ymm2 + +// CHECK: vgetexppbf16 %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3] + vgetexppbf16 %ymm3, %ymm2 {%k7} + +// CHECK: vgetexppbf16 %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3] + vgetexppbf16 %ymm3, %ymm2 {%k7} {z} + +// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] + vgetexppbf16 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] + vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vgetexppbf16 (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10] + vgetexppbf16 (%eax){1to8}, %xmm2 + +// CHECK: vgetexppbf16 -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff] + vgetexppbf16 -512(,%ebp,2), %xmm2 + +// CHECK: vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f] + vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80] + vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] + vgetexppbf16 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] + vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vgetexppbf16 (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10] + vgetexppbf16 (%eax){1to16}, %ymm2 + +// CHECK: vgetexppbf16 -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff] + vgetexppbf16 -1024(,%ebp,2), %ymm2 + +// CHECK: vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f] + vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80] + vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] + vgetexppbf16 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] + vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vgetexppbf16 (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10] + vgetexppbf16 (%eax){1to32}, %zmm2 + +// CHECK: vgetexppbf16 -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff] + vgetexppbf16 -2048(,%ebp,2), %zmm2 + +// CHECK: vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f] + vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80] + vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %zmm3, %zmm2 + +// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} + +// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %ymm3, %ymm2 + +// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} + +// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %xmm3, %xmm2 + +// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} + +// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b] + vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vgetmantpbf16 $123, (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b] + vgetmantpbf16 $123, (%eax){1to8}, %xmm2 + +// CHECK: vgetmantpbf16 $123, -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vgetmantpbf16 $123, -512(,%ebp,2), %xmm2 + +// CHECK: vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b] + vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b] + vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vgetmantpbf16 $123, (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b] + vgetmantpbf16 $123, (%eax){1to16}, %ymm2 + +// CHECK: vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2 + +// CHECK: vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b] + vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b] + vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vgetmantpbf16 $123, (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b] + vgetmantpbf16 $123, (%eax){1to32}, %zmm2 + +// CHECK: vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2 + +// CHECK: vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b] + vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b] + vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0xd4] + vmaxpbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0xd4] + vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0xd4] + vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0xd4] + vmaxpbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0xd4] + vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0xd4] + vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0xd4] + vmaxpbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0xd4] + vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0xd4] + vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10] + vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00] + vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5f,0x10] + vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff] + vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f] + vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80] + vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10] + vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00] + vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5f,0x10] + vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff] + vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f] + vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80] + vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10] + vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00] + vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5f,0x10] + vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff] + vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f] + vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80] + vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0xd4] + vminpbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0xd4] + vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0xd4] + vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0xd4] + vminpbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0xd4] + vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0xd4] + vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0xd4] + vminpbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0xd4] + vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0xd4] + vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10] + vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00] + vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vminpbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5d,0x10] + vminpbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff] + vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f] + vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80] + vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10] + vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00] + vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vminpbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5d,0x10] + vminpbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff] + vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f] + vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80] + vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10] + vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00] + vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vminpbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5d,0x10] + vminpbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vminpbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff] + vminpbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f] + vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80] + vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0xd4] + vmulnepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0xd4] + vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0xd4] + vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0xd4] + vmulnepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0xd4] + vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0xd4] + vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0xd4] + vmulnepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0xd4] + vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0xd4] + vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10] + vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00] + vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x59,0x10] + vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff] + vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f] + vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x59,0x52,0x80] + vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10] + vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00] + vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x59,0x10] + vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff] + vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f] + vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x59,0x52,0x80] + vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10] + vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00] + vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x59,0x10] + vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff] + vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f] + vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x59,0x52,0x80] + vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vrcppbf16 %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0xd3] + vrcppbf16 %xmm3, %xmm2 + +// CHECK: vrcppbf16 %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0xd3] + vrcppbf16 %xmm3, %xmm2 {%k7} + +// CHECK: vrcppbf16 %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0xd3] + vrcppbf16 %xmm3, %xmm2 {%k7} {z} + +// CHECK: vrcppbf16 %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0xd3] + vrcppbf16 %zmm3, %zmm2 + +// CHECK: vrcppbf16 %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0xd3] + vrcppbf16 %zmm3, %zmm2 {%k7} + +// CHECK: vrcppbf16 %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0xd3] + vrcppbf16 %zmm3, %zmm2 {%k7} {z} + +// CHECK: vrcppbf16 %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0xd3] + vrcppbf16 %ymm3, %ymm2 + +// CHECK: vrcppbf16 %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0xd3] + vrcppbf16 %ymm3, %ymm2 {%k7} + +// CHECK: vrcppbf16 %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0xd3] + vrcppbf16 %ymm3, %ymm2 {%k7} {z} + +// CHECK: vrcppbf16 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10] + vrcppbf16 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00] + vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vrcppbf16 (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4c,0x10] + vrcppbf16 (%eax){1to8}, %xmm2 + +// CHECK: vrcppbf16 -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vrcppbf16 -512(,%ebp,2), %xmm2 + +// CHECK: vrcppbf16 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f] + vrcppbf16 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80] + vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vrcppbf16 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10] + vrcppbf16 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00] + vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vrcppbf16 (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4c,0x10] + vrcppbf16 (%eax){1to16}, %ymm2 + +// CHECK: vrcppbf16 -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vrcppbf16 -1024(,%ebp,2), %ymm2 + +// CHECK: vrcppbf16 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f] + vrcppbf16 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80] + vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vrcppbf16 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10] + vrcppbf16 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00] + vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vrcppbf16 (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4c,0x10] + vrcppbf16 (%eax){1to32}, %zmm2 + +// CHECK: vrcppbf16 -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vrcppbf16 -2048(,%ebp,2), %zmm2 + +// CHECK: vrcppbf16 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f] + vrcppbf16 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80] + vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b] + vreducenepbf16 $123, %zmm3, %zmm2 + +// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b] + vreducenepbf16 $123, %zmm3, %zmm2 {%k7} + +// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b] + vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b] + vreducenepbf16 $123, %ymm3, %ymm2 + +// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b] + vreducenepbf16 $123, %ymm3, %ymm2 {%k7} + +// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b] + vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b] + vreducenepbf16 $123, %xmm3, %xmm2 + +// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b] + vreducenepbf16 $123, %xmm3, %xmm2 {%k7} + +// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b] + vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vreducenepbf16 $123, (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b] + vreducenepbf16 $123, (%eax){1to8}, %xmm2 + +// CHECK: vreducenepbf16 $123, -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vreducenepbf16 $123, -512(,%ebp,2), %xmm2 + +// CHECK: vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b] + vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b] + vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vreducenepbf16 $123, (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b] + vreducenepbf16 $123, (%eax){1to16}, %ymm2 + +// CHECK: vreducenepbf16 $123, -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vreducenepbf16 $123, -1024(,%ebp,2), %ymm2 + +// CHECK: vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b] + vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b] + vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vreducenepbf16 $123, (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b] + vreducenepbf16 $123, (%eax){1to32}, %zmm2 + +// CHECK: vreducenepbf16 $123, -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vreducenepbf16 $123, -2048(,%ebp,2), %zmm2 + +// CHECK: vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b] + vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b] + vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %zmm3, %zmm2 + +// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} + +// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %ymm3, %ymm2 + +// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} + +// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %xmm3, %xmm2 + +// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} + +// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b] + vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vrndscalenepbf16 $123, (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b] + vrndscalenepbf16 $123, (%eax){1to8}, %xmm2 + +// CHECK: vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2 + +// CHECK: vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b] + vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b] + vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vrndscalenepbf16 $123, (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b] + vrndscalenepbf16 $123, (%eax){1to16}, %ymm2 + +// CHECK: vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2 + +// CHECK: vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b] + vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b] + vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vrndscalenepbf16 $123, (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b] + vrndscalenepbf16 $123, (%eax){1to32}, %zmm2 + +// CHECK: vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2 + +// CHECK: vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b] + vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b] + vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xd3] + vrsqrtpbf16 %xmm3, %xmm2 + +// CHECK: vrsqrtpbf16 %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0xd3] + vrsqrtpbf16 %xmm3, %xmm2 {%k7} + +// CHECK: vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0xd3] + vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xd3] + vrsqrtpbf16 %zmm3, %zmm2 + +// CHECK: vrsqrtpbf16 %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0xd3] + vrsqrtpbf16 %zmm3, %zmm2 {%k7} + +// CHECK: vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0xd3] + vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xd3] + vrsqrtpbf16 %ymm3, %ymm2 + +// CHECK: vrsqrtpbf16 %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0xd3] + vrsqrtpbf16 %ymm3, %ymm2 {%k7} + +// CHECK: vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0xd3] + vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10] + vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00] + vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vrsqrtpbf16 (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4e,0x10] + vrsqrtpbf16 (%eax){1to8}, %xmm2 + +// CHECK: vrsqrtpbf16 -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff] + vrsqrtpbf16 -512(,%ebp,2), %xmm2 + +// CHECK: vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f] + vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80] + vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10] + vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00] + vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vrsqrtpbf16 (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4e,0x10] + vrsqrtpbf16 (%eax){1to16}, %ymm2 + +// CHECK: vrsqrtpbf16 -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff] + vrsqrtpbf16 -1024(,%ebp,2), %ymm2 + +// CHECK: vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f] + vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80] + vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10] + vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00] + vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vrsqrtpbf16 (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4e,0x10] + vrsqrtpbf16 (%eax){1to32}, %zmm2 + +// CHECK: vrsqrtpbf16 -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff] + vrsqrtpbf16 -2048(,%ebp,2), %zmm2 + +// CHECK: vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f] + vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80] + vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0xd4] + vscalefpbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0xd4] + vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0xd4] + vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0xd4] + vscalefpbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0xd4] + vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0xd4] + vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0xd4] + vscalefpbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0xd4] + vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0xd4] + vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10] + vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00] + vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x2c,0x10] + vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f] + vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80] + vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10] + vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00] + vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x2c,0x10] + vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f] + vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80] + vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10] + vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00] + vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x2c,0x10] + vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f] + vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80] + vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0xd3] + vsqrtnepbf16 %xmm3, %xmm2 + +// CHECK: vsqrtnepbf16 %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0xd3] + vsqrtnepbf16 %xmm3, %xmm2 {%k7} + +// CHECK: vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0xd3] + vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0xd3] + vsqrtnepbf16 %zmm3, %zmm2 + +// CHECK: vsqrtnepbf16 %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0xd3] + vsqrtnepbf16 %zmm3, %zmm2 {%k7} + +// CHECK: vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0xd3] + vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0xd3] + vsqrtnepbf16 %ymm3, %ymm2 + +// CHECK: vsqrtnepbf16 %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0xd3] + vsqrtnepbf16 %ymm3, %ymm2 {%k7} + +// CHECK: vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0xd3] + vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10] + vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00] + vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7} + +// CHECK: vsqrtnepbf16 (%eax){1to8}, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x51,0x10] + vsqrtnepbf16 (%eax){1to8}, %xmm2 + +// CHECK: vsqrtnepbf16 -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff] + vsqrtnepbf16 -512(,%ebp,2), %xmm2 + +// CHECK: vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f] + vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80] + vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10] + vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00] + vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7} + +// CHECK: vsqrtnepbf16 (%eax){1to16}, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x51,0x10] + vsqrtnepbf16 (%eax){1to16}, %ymm2 + +// CHECK: vsqrtnepbf16 -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff] + vsqrtnepbf16 -1024(,%ebp,2), %ymm2 + +// CHECK: vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f] + vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80] + vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10] + vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2 + +// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00] + vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7} + +// CHECK: vsqrtnepbf16 (%eax){1to32}, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x51,0x10] + vsqrtnepbf16 (%eax){1to32}, %zmm2 + +// CHECK: vsqrtnepbf16 -2048(,%ebp,2), %zmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsqrtnepbf16 -2048(,%ebp,2), %zmm2 + +// CHECK: vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f] + vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z} + +// CHECK: vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80] + vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z} + +// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0xd4] + vsubnepbf16 %ymm4, %ymm3, %ymm2 + +// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0xd4] + vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} + +// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0xd4] + vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0xd4] + vsubnepbf16 %zmm4, %zmm3, %zmm2 + +// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0xd4] + vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} + +// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0xd4] + vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0xd4] + vsubnepbf16 %xmm4, %xmm3, %xmm2 + +// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0xd4] + vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} + +// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0xd4] + vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z} + +// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10] + vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00] + vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} + +// CHECK: vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5c,0x10] + vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2 + +// CHECK: vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f] + vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z} + +// CHECK: vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80] + vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z} + +// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10] + vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00] + vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} + +// CHECK: vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5c,0x10] + vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2 + +// CHECK: vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f] + vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z} + +// CHECK: vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80] + vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z} + +// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10] + vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00] + vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} + +// CHECK: vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5c,0x10] + vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2 + +// CHECK: vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f] + vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z} + +// CHECK: vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80] + vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z} + diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-intel.s b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s new file mode 100644 index 00000000000000..30c2cf45297bc0 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s @@ -0,0 +1,3014 @@ +// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vaddnepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0xd4] + vaddnepbf16 ymm2, ymm3, ymm4 + +// CHECK: vaddnepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0xd4] + vaddnepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0xd4] + vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vaddnepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0xd4] + vaddnepbf16 zmm2, zmm3, zmm4 + +// CHECK: vaddnepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0xd4] + vaddnepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0xd4] + vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vaddnepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0xd4] + vaddnepbf16 xmm2, xmm3, xmm4 + +// CHECK: vaddnepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0xd4] + vaddnepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0xd4] + vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10] + vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00] + vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x58,0x10] + vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff] + vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f] + vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x58,0x52,0x80] + vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10] + vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00] + vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x58,0x10] + vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff] + vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f] + vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x58,0x52,0x80] + vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10] + vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00] + vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x58,0x10] + vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff] + vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f] + vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x58,0x52,0x80] + vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vcmppbf16 k5, ymm3, ymm4, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b] + vcmppbf16 k5, ymm3, ymm4, 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm3, ymm4, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b] + vcmppbf16 k5 {k7}, ymm3, ymm4, 123 + +// CHECK: vcmppbf16 k5, xmm3, xmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b] + vcmppbf16 k5, xmm3, xmm4, 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm3, xmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b] + vcmppbf16 k5 {k7}, xmm3, xmm4, 123 + +// CHECK: vcmppbf16 k5, zmm3, zmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b] + vcmppbf16 k5, zmm3, zmm4, 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm3, zmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b] + vcmppbf16 k5 {k7}, zmm3, zmm4, 123 + +// CHECK: vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b] + vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123 + +// CHECK: vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b] + vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b] + vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123 + +// CHECK: vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b] + vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123 + +// CHECK: vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b] + vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b] + vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123 + +// CHECK: vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b] + vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123 + +// CHECK: vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b] + vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b] + vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123 + +// CHECK: vcomsbf16 xmm2, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xd3] + vcomsbf16 xmm2, xmm3 + +// CHECK: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomsbf16 xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vcomsbf16 xmm2, word ptr [eax] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x10] + vcomsbf16 xmm2, word ptr [eax] + +// CHECK: vcomsbf16 xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] + vcomsbf16 xmm2, word ptr [2*ebp - 64] + +// CHECK: vcomsbf16 xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f] + vcomsbf16 xmm2, word ptr [ecx + 254] + +// CHECK: vcomsbf16 xmm2, word ptr [edx - 256] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80] + vcomsbf16 xmm2, word ptr [edx - 256] + +// CHECK: vdivnepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0xd4] + vdivnepbf16 ymm2, ymm3, ymm4 + +// CHECK: vdivnepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0xd4] + vdivnepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0xd4] + vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vdivnepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0xd4] + vdivnepbf16 zmm2, zmm3, zmm4 + +// CHECK: vdivnepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0xd4] + vdivnepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0xd4] + vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vdivnepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0xd4] + vdivnepbf16 xmm2, xmm3, xmm4 + +// CHECK: vdivnepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0xd4] + vdivnepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0xd4] + vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10] + vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00] + vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5e,0x10] + vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff] + vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f] + vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80] + vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10] + vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00] + vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5e,0x10] + vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff] + vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f] + vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80] + vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10] + vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00] + vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5e,0x10] + vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff] + vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f] + vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80] + vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0xd4] + vfmadd132nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0xd4] + vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0xd4] + vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0xd4] + vfmadd132nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0xd4] + vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0xd4] + vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0xd4] + vfmadd132nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0xd4] + vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0xd4] + vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x98,0x10] + vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f] + vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x98,0x52,0x80] + vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x98,0x10] + vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f] + vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x98,0x52,0x80] + vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x98,0x10] + vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f] + vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x98,0x52,0x80] + vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0xd4] + vfmadd213nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0xd4] + vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0xd4] + vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0xd4] + vfmadd213nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0xd4] + vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0xd4] + vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0xd4] + vfmadd213nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0xd4] + vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0xd4] + vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xa8,0x10] + vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f] + vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80] + vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xa8,0x10] + vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f] + vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80] + vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xa8,0x10] + vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f] + vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80] + vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0xd4] + vfmadd231nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0xd4] + vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0xd4] + vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0xd4] + vfmadd231nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0xd4] + vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0xd4] + vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0xd4] + vfmadd231nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0xd4] + vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0xd4] + vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xb8,0x10] + vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f] + vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80] + vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xb8,0x10] + vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f] + vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80] + vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xb8,0x10] + vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f] + vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80] + vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0xd4] + vfmsub132nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0xd4] + vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0xd4] + vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0xd4] + vfmsub132nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0xd4] + vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0xd4] + vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0xd4] + vfmsub132nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0xd4] + vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0xd4] + vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9a,0x10] + vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f] + vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80] + vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9a,0x10] + vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f] + vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80] + vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9a,0x10] + vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f] + vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80] + vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0xd4] + vfmsub213nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0xd4] + vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0xd4] + vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0xd4] + vfmsub213nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0xd4] + vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0xd4] + vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0xd4] + vfmsub213nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0xd4] + vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0xd4] + vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xaa,0x10] + vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f] + vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80] + vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xaa,0x10] + vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f] + vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80] + vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xaa,0x10] + vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f] + vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80] + vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0xd4] + vfmsub231nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0xd4] + vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0xd4] + vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0xd4] + vfmsub231nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0xd4] + vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0xd4] + vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0xd4] + vfmsub231nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0xd4] + vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0xd4] + vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xba,0x10] + vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f] + vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xba,0x52,0x80] + vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xba,0x10] + vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f] + vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xba,0x52,0x80] + vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xba,0x10] + vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f] + vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xba,0x52,0x80] + vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0xd4] + vfnmadd132nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0xd4] + vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0xd4] + vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0xd4] + vfnmadd132nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0xd4] + vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0xd4] + vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0xd4] + vfnmadd132nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0xd4] + vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0xd4] + vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9c,0x10] + vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f] + vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80] + vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9c,0x10] + vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f] + vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80] + vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9c,0x10] + vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f] + vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80] + vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0xd4] + vfnmadd213nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0xd4] + vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0xd4] + vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0xd4] + vfnmadd213nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0xd4] + vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0xd4] + vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0xd4] + vfnmadd213nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0xd4] + vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0xd4] + vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xac,0x10] + vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f] + vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xac,0x52,0x80] + vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xac,0x10] + vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f] + vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xac,0x52,0x80] + vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xac,0x10] + vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f] + vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xac,0x52,0x80] + vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0xd4] + vfnmadd231nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0xd4] + vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0xd4] + vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0xd4] + vfnmadd231nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0xd4] + vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0xd4] + vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0xd4] + vfnmadd231nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0xd4] + vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0xd4] + vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbc,0x10] + vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f] + vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80] + vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbc,0x10] + vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f] + vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80] + vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbc,0x10] + vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f] + vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80] + vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0xd4] + vfnmsub132nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0xd4] + vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0xd4] + vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0xd4] + vfnmsub132nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0xd4] + vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0xd4] + vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0xd4] + vfnmsub132nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0xd4] + vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0xd4] + vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9e,0x10] + vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f] + vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80] + vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9e,0x10] + vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f] + vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80] + vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9e,0x10] + vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f] + vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80] + vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0xd4] + vfnmsub213nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0xd4] + vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0xd4] + vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0xd4] + vfnmsub213nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0xd4] + vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0xd4] + vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0xd4] + vfnmsub213nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0xd4] + vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0xd4] + vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xae,0x10] + vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f] + vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xae,0x52,0x80] + vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xae,0x10] + vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f] + vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xae,0x52,0x80] + vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xae,0x10] + vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f] + vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xae,0x52,0x80] + vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0xd4] + vfnmsub231nepbf16 ymm2, ymm3, ymm4 + +// CHECK: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0xd4] + vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0xd4] + vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0xd4] + vfnmsub231nepbf16 zmm2, zmm3, zmm4 + +// CHECK: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0xd4] + vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0xd4] + vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0xd4] + vfnmsub231nepbf16 xmm2, xmm3, xmm4 + +// CHECK: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0xd4] + vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0xd4] + vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbe,0x10] + vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f] + vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80] + vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbe,0x10] + vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f] + vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80] + vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbe,0x10] + vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f] + vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80] + vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vfpclasspbf16 k5, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b] + vfpclasspbf16 k5, zmm3, 123 + +// CHECK: vfpclasspbf16 k5 {k7}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b] + vfpclasspbf16 k5 {k7}, zmm3, 123 + +// CHECK: vfpclasspbf16 k5, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b] + vfpclasspbf16 k5, ymm3, 123 + +// CHECK: vfpclasspbf16 k5 {k7}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b] + vfpclasspbf16 k5 {k7}, ymm3, 123 + +// CHECK: vfpclasspbf16 k5, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b] + vfpclasspbf16 k5, xmm3, 123 + +// CHECK: vfpclasspbf16 k5 {k7}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b] + vfpclasspbf16 k5 {k7}, xmm3, 123 + +// CHECK: vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b] + vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b] + vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b] + vfpclasspbf16 k5, word ptr [eax]{1to8}, 123 + +// CHECK: vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123 + +// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b] + vfpclasspbf16 k5, word ptr [eax]{1to16}, 123 + +// CHECK: vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123 + +// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b] + vfpclasspbf16 k5, word ptr [eax]{1to32}, 123 + +// CHECK: vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123 + +// CHECK: vgetexppbf16 xmm2, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3] + vgetexppbf16 xmm2, xmm3 + +// CHECK: vgetexppbf16 xmm2 {k7}, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3] + vgetexppbf16 xmm2 {k7}, xmm3 + +// CHECK: vgetexppbf16 xmm2 {k7} {z}, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3] + vgetexppbf16 xmm2 {k7} {z}, xmm3 + +// CHECK: vgetexppbf16 zmm2, zmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3] + vgetexppbf16 zmm2, zmm3 + +// CHECK: vgetexppbf16 zmm2 {k7}, zmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3] + vgetexppbf16 zmm2 {k7}, zmm3 + +// CHECK: vgetexppbf16 zmm2 {k7} {z}, zmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3] + vgetexppbf16 zmm2 {k7} {z}, zmm3 + +// CHECK: vgetexppbf16 ymm2, ymm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3] + vgetexppbf16 ymm2, ymm3 + +// CHECK: vgetexppbf16 ymm2 {k7}, ymm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3] + vgetexppbf16 ymm2 {k7}, ymm3 + +// CHECK: vgetexppbf16 ymm2 {k7} {z}, ymm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3] + vgetexppbf16 ymm2 {k7} {z}, ymm3 + +// CHECK: vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] + vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] + vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vgetexppbf16 xmm2, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10] + vgetexppbf16 xmm2, word ptr [eax]{1to8} + +// CHECK: vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff] + vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f] + vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] + +// CHECK: vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80] + vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} + +// CHECK: vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] + vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] + vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vgetexppbf16 ymm2, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10] + vgetexppbf16 ymm2, word ptr [eax]{1to16} + +// CHECK: vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff] + vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f] + vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] + +// CHECK: vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80] + vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} + +// CHECK: vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10] + vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00] + vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vgetexppbf16 zmm2, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10] + vgetexppbf16 zmm2, word ptr [eax]{1to32} + +// CHECK: vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff] + vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048] + +// CHECK: vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f] + vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] + +// CHECK: vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80] + vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} + +// CHECK: vgetmantpbf16 zmm2, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b] + vgetmantpbf16 zmm2, zmm3, 123 + +// CHECK: vgetmantpbf16 zmm2 {k7}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b] + vgetmantpbf16 zmm2 {k7}, zmm3, 123 + +// CHECK: vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b] + vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123 + +// CHECK: vgetmantpbf16 ymm2, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b] + vgetmantpbf16 ymm2, ymm3, 123 + +// CHECK: vgetmantpbf16 ymm2 {k7}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b] + vgetmantpbf16 ymm2 {k7}, ymm3, 123 + +// CHECK: vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b] + vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123 + +// CHECK: vgetmantpbf16 xmm2, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b] + vgetmantpbf16 xmm2, xmm3, 123 + +// CHECK: vgetmantpbf16 xmm2 {k7}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b] + vgetmantpbf16 xmm2 {k7}, xmm3, 123 + +// CHECK: vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b] + vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123 + +// CHECK: vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b] + vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123 + +// CHECK: vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123 + +// CHECK: vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b] + vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 + +// CHECK: vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b] + vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 + +// CHECK: vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b] + vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123 + +// CHECK: vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 + +// CHECK: vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b] + vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 + +// CHECK: vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b] + vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 + +// CHECK: vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b] + vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123 + +// CHECK: vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 + +// CHECK: vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b] + vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 + +// CHECK: vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b] + vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 + +// CHECK: vmaxpbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0xd4] + vmaxpbf16 ymm2, ymm3, ymm4 + +// CHECK: vmaxpbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0xd4] + vmaxpbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0xd4] + vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vmaxpbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0xd4] + vmaxpbf16 zmm2, zmm3, zmm4 + +// CHECK: vmaxpbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0xd4] + vmaxpbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0xd4] + vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vmaxpbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0xd4] + vmaxpbf16 xmm2, xmm3, xmm4 + +// CHECK: vmaxpbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0xd4] + vmaxpbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0xd4] + vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10] + vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00] + vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5f,0x10] + vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff] + vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f] + vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80] + vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10] + vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00] + vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5f,0x10] + vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff] + vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f] + vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80] + vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10] + vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00] + vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5f,0x10] + vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff] + vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f] + vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80] + vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vminpbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0xd4] + vminpbf16 ymm2, ymm3, ymm4 + +// CHECK: vminpbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0xd4] + vminpbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0xd4] + vminpbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vminpbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0xd4] + vminpbf16 zmm2, zmm3, zmm4 + +// CHECK: vminpbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0xd4] + vminpbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0xd4] + vminpbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vminpbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0xd4] + vminpbf16 xmm2, xmm3, xmm4 + +// CHECK: vminpbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0xd4] + vminpbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0xd4] + vminpbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10] + vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00] + vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vminpbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5d,0x10] + vminpbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff] + vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f] + vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80] + vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10] + vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00] + vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vminpbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5d,0x10] + vminpbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff] + vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f] + vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80] + vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10] + vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00] + vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vminpbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5d,0x10] + vminpbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff] + vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f] + vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80] + vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vmulnepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0xd4] + vmulnepbf16 ymm2, ymm3, ymm4 + +// CHECK: vmulnepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0xd4] + vmulnepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0xd4] + vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vmulnepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0xd4] + vmulnepbf16 zmm2, zmm3, zmm4 + +// CHECK: vmulnepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0xd4] + vmulnepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0xd4] + vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vmulnepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0xd4] + vmulnepbf16 xmm2, xmm3, xmm4 + +// CHECK: vmulnepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0xd4] + vmulnepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0xd4] + vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10] + vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00] + vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x59,0x10] + vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff] + vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f] + vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x59,0x52,0x80] + vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10] + vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00] + vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x59,0x10] + vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff] + vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f] + vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x59,0x52,0x80] + vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10] + vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00] + vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x59,0x10] + vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff] + vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f] + vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x59,0x52,0x80] + vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vrcppbf16 xmm2, xmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0xd3] + vrcppbf16 xmm2, xmm3 + +// CHECK: vrcppbf16 xmm2 {k7}, xmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0xd3] + vrcppbf16 xmm2 {k7}, xmm3 + +// CHECK: vrcppbf16 xmm2 {k7} {z}, xmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0xd3] + vrcppbf16 xmm2 {k7} {z}, xmm3 + +// CHECK: vrcppbf16 zmm2, zmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0xd3] + vrcppbf16 zmm2, zmm3 + +// CHECK: vrcppbf16 zmm2 {k7}, zmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0xd3] + vrcppbf16 zmm2 {k7}, zmm3 + +// CHECK: vrcppbf16 zmm2 {k7} {z}, zmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0xd3] + vrcppbf16 zmm2 {k7} {z}, zmm3 + +// CHECK: vrcppbf16 ymm2, ymm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0xd3] + vrcppbf16 ymm2, ymm3 + +// CHECK: vrcppbf16 ymm2 {k7}, ymm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0xd3] + vrcppbf16 ymm2 {k7}, ymm3 + +// CHECK: vrcppbf16 ymm2 {k7} {z}, ymm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0xd3] + vrcppbf16 ymm2 {k7} {z}, ymm3 + +// CHECK: vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10] + vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00] + vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vrcppbf16 xmm2, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4c,0x10] + vrcppbf16 xmm2, word ptr [eax]{1to8} + +// CHECK: vrcppbf16 xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vrcppbf16 xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f] + vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] + +// CHECK: vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80] + vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} + +// CHECK: vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10] + vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00] + vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vrcppbf16 ymm2, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4c,0x10] + vrcppbf16 ymm2, word ptr [eax]{1to16} + +// CHECK: vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f] + vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] + +// CHECK: vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80] + vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} + +// CHECK: vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10] + vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00] + vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vrcppbf16 zmm2, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4c,0x10] + vrcppbf16 zmm2, word ptr [eax]{1to32} + +// CHECK: vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048] + +// CHECK: vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f] + vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] + +// CHECK: vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80] + vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} + +// CHECK: vreducenepbf16 zmm2, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b] + vreducenepbf16 zmm2, zmm3, 123 + +// CHECK: vreducenepbf16 zmm2 {k7}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b] + vreducenepbf16 zmm2 {k7}, zmm3, 123 + +// CHECK: vreducenepbf16 zmm2 {k7} {z}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b] + vreducenepbf16 zmm2 {k7} {z}, zmm3, 123 + +// CHECK: vreducenepbf16 ymm2, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b] + vreducenepbf16 ymm2, ymm3, 123 + +// CHECK: vreducenepbf16 ymm2 {k7}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b] + vreducenepbf16 ymm2 {k7}, ymm3, 123 + +// CHECK: vreducenepbf16 ymm2 {k7} {z}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b] + vreducenepbf16 ymm2 {k7} {z}, ymm3, 123 + +// CHECK: vreducenepbf16 xmm2, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b] + vreducenepbf16 xmm2, xmm3, 123 + +// CHECK: vreducenepbf16 xmm2 {k7}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b] + vreducenepbf16 xmm2 {k7}, xmm3, 123 + +// CHECK: vreducenepbf16 xmm2 {k7} {z}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b] + vreducenepbf16 xmm2 {k7} {z}, xmm3, 123 + +// CHECK: vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b] + vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123 + +// CHECK: vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123 + +// CHECK: vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b] + vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 + +// CHECK: vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b] + vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 + +// CHECK: vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b] + vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123 + +// CHECK: vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 + +// CHECK: vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b] + vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 + +// CHECK: vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b] + vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 + +// CHECK: vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b] + vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123 + +// CHECK: vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 + +// CHECK: vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b] + vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 + +// CHECK: vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b] + vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 + +// CHECK: vrndscalenepbf16 zmm2, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b] + vrndscalenepbf16 zmm2, zmm3, 123 + +// CHECK: vrndscalenepbf16 zmm2 {k7}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b] + vrndscalenepbf16 zmm2 {k7}, zmm3, 123 + +// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b] + vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123 + +// CHECK: vrndscalenepbf16 ymm2, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b] + vrndscalenepbf16 ymm2, ymm3, 123 + +// CHECK: vrndscalenepbf16 ymm2 {k7}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b] + vrndscalenepbf16 ymm2 {k7}, ymm3, 123 + +// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b] + vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123 + +// CHECK: vrndscalenepbf16 xmm2, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b] + vrndscalenepbf16 xmm2, xmm3, 123 + +// CHECK: vrndscalenepbf16 xmm2 {k7}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b] + vrndscalenepbf16 xmm2 {k7}, xmm3, 123 + +// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b] + vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123 + +// CHECK: vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b] + vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123 + +// CHECK: vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123 + +// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b] + vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123 + +// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b] + vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123 + +// CHECK: vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b] + vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123 + +// CHECK: vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123 + +// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b] + vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123 + +// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b] + vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123 + +// CHECK: vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b] + vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123 + +// CHECK: vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123 + +// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b] + vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123 + +// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b] + vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123 + +// CHECK: vrsqrtpbf16 xmm2, xmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xd3] + vrsqrtpbf16 xmm2, xmm3 + +// CHECK: vrsqrtpbf16 xmm2 {k7}, xmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0xd3] + vrsqrtpbf16 xmm2 {k7}, xmm3 + +// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, xmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0xd3] + vrsqrtpbf16 xmm2 {k7} {z}, xmm3 + +// CHECK: vrsqrtpbf16 zmm2, zmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xd3] + vrsqrtpbf16 zmm2, zmm3 + +// CHECK: vrsqrtpbf16 zmm2 {k7}, zmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0xd3] + vrsqrtpbf16 zmm2 {k7}, zmm3 + +// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, zmm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0xd3] + vrsqrtpbf16 zmm2 {k7} {z}, zmm3 + +// CHECK: vrsqrtpbf16 ymm2, ymm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xd3] + vrsqrtpbf16 ymm2, ymm3 + +// CHECK: vrsqrtpbf16 ymm2 {k7}, ymm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0xd3] + vrsqrtpbf16 ymm2 {k7}, ymm3 + +// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, ymm3 +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0xd3] + vrsqrtpbf16 ymm2 {k7} {z}, ymm3 + +// CHECK: vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10] + vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00] + vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vrsqrtpbf16 xmm2, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4e,0x10] + vrsqrtpbf16 xmm2, word ptr [eax]{1to8} + +// CHECK: vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff] + vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f] + vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] + +// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80] + vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} + +// CHECK: vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10] + vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00] + vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vrsqrtpbf16 ymm2, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4e,0x10] + vrsqrtpbf16 ymm2, word ptr [eax]{1to16} + +// CHECK: vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff] + vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f] + vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] + +// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80] + vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} + +// CHECK: vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10] + vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00] + vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vrsqrtpbf16 zmm2, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4e,0x10] + vrsqrtpbf16 zmm2, word ptr [eax]{1to32} + +// CHECK: vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff] + vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048] + +// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f] + vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] + +// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80] + vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} + +// CHECK: vscalefpbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0xd4] + vscalefpbf16 ymm2, ymm3, ymm4 + +// CHECK: vscalefpbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0xd4] + vscalefpbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0xd4] + vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vscalefpbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0xd4] + vscalefpbf16 zmm2, zmm3, zmm4 + +// CHECK: vscalefpbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0xd4] + vscalefpbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0xd4] + vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vscalefpbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0xd4] + vscalefpbf16 xmm2, xmm3, xmm4 + +// CHECK: vscalefpbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0xd4] + vscalefpbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0xd4] + vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10] + vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00] + vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x2c,0x10] + vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f] + vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80] + vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10] + vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00] + vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x2c,0x10] + vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f] + vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80] + vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10] + vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00] + vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x2c,0x10] + vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f] + vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80] + vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + +// CHECK: vsqrtnepbf16 xmm2, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0xd3] + vsqrtnepbf16 xmm2, xmm3 + +// CHECK: vsqrtnepbf16 xmm2 {k7}, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0xd3] + vsqrtnepbf16 xmm2 {k7}, xmm3 + +// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0xd3] + vsqrtnepbf16 xmm2 {k7} {z}, xmm3 + +// CHECK: vsqrtnepbf16 zmm2, zmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0xd3] + vsqrtnepbf16 zmm2, zmm3 + +// CHECK: vsqrtnepbf16 zmm2 {k7}, zmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0xd3] + vsqrtnepbf16 zmm2 {k7}, zmm3 + +// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, zmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0xd3] + vsqrtnepbf16 zmm2 {k7} {z}, zmm3 + +// CHECK: vsqrtnepbf16 ymm2, ymm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0xd3] + vsqrtnepbf16 ymm2, ymm3 + +// CHECK: vsqrtnepbf16 ymm2 {k7}, ymm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0xd3] + vsqrtnepbf16 ymm2 {k7}, ymm3 + +// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, ymm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0xd3] + vsqrtnepbf16 ymm2 {k7} {z}, ymm3 + +// CHECK: vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10] + vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00] + vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vsqrtnepbf16 xmm2, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x51,0x10] + vsqrtnepbf16 xmm2, word ptr [eax]{1to8} + +// CHECK: vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff] + vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f] + vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032] + +// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80] + vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8} + +// CHECK: vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10] + vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00] + vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vsqrtnepbf16 ymm2, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x51,0x10] + vsqrtnepbf16 ymm2, word ptr [eax]{1to16} + +// CHECK: vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff] + vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f] + vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064] + +// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80] + vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16} + +// CHECK: vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10] + vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00] + vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vsqrtnepbf16 zmm2, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x51,0x10] + vsqrtnepbf16 zmm2, word ptr [eax]{1to32} + +// CHECK: vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048] + +// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f] + vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128] + +// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80] + vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32} + +// CHECK: vsubnepbf16 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0xd4] + vsubnepbf16 ymm2, ymm3, ymm4 + +// CHECK: vsubnepbf16 ymm2 {k7}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0xd4] + vsubnepbf16 ymm2 {k7}, ymm3, ymm4 + +// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0xd4] + vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4 + +// CHECK: vsubnepbf16 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0xd4] + vsubnepbf16 zmm2, zmm3, zmm4 + +// CHECK: vsubnepbf16 zmm2 {k7}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0xd4] + vsubnepbf16 zmm2 {k7}, zmm3, zmm4 + +// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0xd4] + vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4 + +// CHECK: vsubnepbf16 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0xd4] + vsubnepbf16 xmm2, xmm3, xmm4 + +// CHECK: vsubnepbf16 xmm2 {k7}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0xd4] + vsubnepbf16 xmm2 {k7}, xmm3, xmm4 + +// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0xd4] + vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4 + +// CHECK: vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10] + vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00] + vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5c,0x10] + vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32} + +// CHECK: vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f] + vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} +// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80] + vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32} + +// CHECK: vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10] + vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00] + vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5c,0x10] + vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16} + +// CHECK: vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff] + vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f] + vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} +// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80] + vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16} + +// CHECK: vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10] + vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00] + vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5c,0x10] + vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8} + +// CHECK: vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff] + vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f] + vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} +// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80] + vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8} + diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-att.s b/llvm/test/MC/X86/avx10.2-bf16-64-att.s new file mode 100644 index 00000000000000..85d99cfe0a7043 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-bf16-64-att.s @@ -0,0 +1,3014 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x58,0xf0] + vaddnepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x58,0xf0] + vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x58,0xf0] + vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x58,0xf0] + vaddnepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x58,0xf0] + vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x58,0xf0] + vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x58,0xf0] + vaddnepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x58,0xf0] + vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x58,0xf0] + vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10] + vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00] + vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00] + vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff] + vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f] + vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x58,0x72,0x80] + vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10] + vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00] + vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00] + vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff] + vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f] + vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x58,0x72,0x80] + vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10] + vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00] + vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00] + vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff] + vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x58,0x71,0x7f] + vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x58,0x72,0x80] + vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vcmppbf16 $123, %ymm24, %ymm23, %k5 +// CHECK: encoding: [0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b] + vcmppbf16 $123, %ymm24, %ymm23, %k5 + +// CHECK: vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7} +// CHECK: encoding: [0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b] + vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, %xmm24, %xmm23, %k5 +// CHECK: encoding: [0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b] + vcmppbf16 $123, %xmm24, %xmm23, %k5 + +// CHECK: vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b] + vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, %zmm24, %zmm23, %k5 +// CHECK: encoding: [0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b] + vcmppbf16 $123, %zmm24, %zmm23, %k5 + +// CHECK: vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b] + vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5 +// CHECK: encoding: [0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5 + +// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5 +// CHECK: encoding: [0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b] + vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5 + +// CHECK: vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5 +// CHECK: encoding: [0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5 + +// CHECK: vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b] + vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b] + vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5 +// CHECK: encoding: [0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5 + +// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5 +// CHECK: encoding: [0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b] + vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5 + +// CHECK: vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5 +// CHECK: encoding: [0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5 + +// CHECK: vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b] + vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b] + vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5 +// CHECK: encoding: [0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5 + +// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5 +// CHECK: encoding: [0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b] + vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5 + +// CHECK: vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5 +// CHECK: encoding: [0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5 + +// CHECK: vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b] + vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7} + +// CHECK: vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b] + vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7} + +// CHECK: vcomsbf16 %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xf7] + vcomsbf16 %xmm23, %xmm22 + +// CHECK: vcomsbf16 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomsbf16 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vcomsbf16 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomsbf16 291(%r8,%rax,4), %xmm22 + +// CHECK: vcomsbf16 (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomsbf16 (%rip), %xmm22 + +// CHECK: vcomsbf16 -64(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] + vcomsbf16 -64(,%rbp,2), %xmm22 + +// CHECK: vcomsbf16 254(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f] + vcomsbf16 254(%rcx), %xmm22 + +// CHECK: vcomsbf16 -256(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80] + vcomsbf16 -256(%rdx), %xmm22 + +// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5e,0xf0] + vdivnepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5e,0xf0] + vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5e,0xf0] + vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5e,0xf0] + vdivnepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5e,0xf0] + vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5e,0xf0] + vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5e,0xf0] + vdivnepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5e,0xf0] + vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5e,0xf0] + vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00] + vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00] + vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff] + vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f] + vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80] + vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00] + vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00] + vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff] + vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f] + vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80] + vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00] + vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00] + vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff] + vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f] + vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5e,0x72,0x80] + vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x98,0xf0] + vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x98,0xf0] + vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x98,0xf0] + vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x98,0xf0] + vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x98,0xf0] + vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x98,0xf0] + vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x98,0xf0] + vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x98,0xf0] + vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x98,0xf0] + vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00] + vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f] + vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x98,0x72,0x80] + vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00] + vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f] + vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x98,0x72,0x80] + vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00] + vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x98,0x71,0x7f] + vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x98,0x72,0x80] + vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xa8,0xf0] + vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xa8,0xf0] + vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xa8,0xf0] + vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xa8,0xf0] + vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xa8,0xf0] + vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xa8,0xf0] + vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xa8,0xf0] + vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xa8,0xf0] + vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xa8,0xf0] + vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00] + vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f] + vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80] + vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00] + vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f] + vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80] + vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00] + vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f] + vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xa8,0x72,0x80] + vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xb8,0xf0] + vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xb8,0xf0] + vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xb8,0xf0] + vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xb8,0xf0] + vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xb8,0xf0] + vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xb8,0xf0] + vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xb8,0xf0] + vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xb8,0xf0] + vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xb8,0xf0] + vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00] + vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f] + vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80] + vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00] + vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f] + vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80] + vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00] + vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f] + vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xb8,0x72,0x80] + vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9a,0xf0] + vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9a,0xf0] + vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9a,0xf0] + vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9a,0xf0] + vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9a,0xf0] + vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9a,0xf0] + vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9a,0xf0] + vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9a,0xf0] + vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9a,0xf0] + vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00] + vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f] + vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80] + vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00] + vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f] + vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80] + vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00] + vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f] + vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9a,0x72,0x80] + vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xaa,0xf0] + vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xaa,0xf0] + vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xaa,0xf0] + vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xaa,0xf0] + vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xaa,0xf0] + vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xaa,0xf0] + vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xaa,0xf0] + vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xaa,0xf0] + vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xaa,0xf0] + vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00] + vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f] + vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80] + vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00] + vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f] + vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80] + vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00] + vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f] + vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xaa,0x72,0x80] + vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xba,0xf0] + vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xba,0xf0] + vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xba,0xf0] + vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xba,0xf0] + vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xba,0xf0] + vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xba,0xf0] + vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xba,0xf0] + vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xba,0xf0] + vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xba,0xf0] + vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00] + vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f] + vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xba,0x72,0x80] + vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00] + vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f] + vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xba,0x72,0x80] + vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00] + vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xba,0x71,0x7f] + vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xba,0x72,0x80] + vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9c,0xf0] + vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9c,0xf0] + vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9c,0xf0] + vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9c,0xf0] + vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9c,0xf0] + vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9c,0xf0] + vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9c,0xf0] + vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9c,0xf0] + vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9c,0xf0] + vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00] + vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f] + vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80] + vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00] + vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f] + vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80] + vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00] + vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f] + vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9c,0x72,0x80] + vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xac,0xf0] + vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xac,0xf0] + vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xac,0xf0] + vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xac,0xf0] + vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xac,0xf0] + vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xac,0xf0] + vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xac,0xf0] + vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xac,0xf0] + vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xac,0xf0] + vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00] + vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f] + vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xac,0x72,0x80] + vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00] + vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f] + vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xac,0x72,0x80] + vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00] + vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xac,0x71,0x7f] + vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xac,0x72,0x80] + vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbc,0xf0] + vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbc,0xf0] + vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbc,0xf0] + vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbc,0xf0] + vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbc,0xf0] + vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbc,0xf0] + vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbc,0xf0] + vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbc,0xf0] + vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbc,0xf0] + vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00] + vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f] + vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80] + vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00] + vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f] + vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80] + vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00] + vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f] + vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbc,0x72,0x80] + vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9e,0xf0] + vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9e,0xf0] + vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9e,0xf0] + vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9e,0xf0] + vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9e,0xf0] + vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9e,0xf0] + vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9e,0xf0] + vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9e,0xf0] + vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9e,0xf0] + vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00] + vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f] + vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80] + vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00] + vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f] + vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80] + vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00] + vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f] + vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9e,0x72,0x80] + vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xae,0xf0] + vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xae,0xf0] + vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xae,0xf0] + vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xae,0xf0] + vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xae,0xf0] + vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xae,0xf0] + vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xae,0xf0] + vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xae,0xf0] + vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xae,0xf0] + vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00] + vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f] + vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xae,0x72,0x80] + vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00] + vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f] + vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xae,0x72,0x80] + vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00] + vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xae,0x71,0x7f] + vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xae,0x72,0x80] + vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbe,0xf0] + vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbe,0xf0] + vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbe,0xf0] + vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbe,0xf0] + vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbe,0xf0] + vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbe,0xf0] + vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbe,0xf0] + vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbe,0xf0] + vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbe,0xf0] + vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00] + vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f] + vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80] + vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00] + vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f] + vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80] + vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00] + vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f] + vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbe,0x72,0x80] + vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vfpclasspbf16 $123, %zmm23, %k5 +// CHECK: encoding: [0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b] + vfpclasspbf16 $123, %zmm23, %k5 + +// CHECK: vfpclasspbf16 $123, %zmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b] + vfpclasspbf16 $123, %zmm23, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, %ymm23, %k5 +// CHECK: encoding: [0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b] + vfpclasspbf16 $123, %ymm23, %k5 + +// CHECK: vfpclasspbf16 $123, %ymm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b] + vfpclasspbf16 $123, %ymm23, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, %xmm23, %k5 +// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b] + vfpclasspbf16 $123, %xmm23, %k5 + +// CHECK: vfpclasspbf16 $123, %xmm23, %k5 {%k7} +// CHECK: encoding: [0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b] + vfpclasspbf16 $123, %xmm23, %k5 {%k7} + +// CHECK: vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5 +// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5 + +// CHECK: vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7} +// CHECK: encoding: [0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, (%rip){1to8}, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b] + vfpclasspbf16 $123, (%rip){1to8}, %k5 + +// CHECK: vfpclasspbf16x $123, -512(,%rbp,2), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vfpclasspbf16x $123, -512(,%rbp,2), %k5 + +// CHECK: vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, (%rip){1to16}, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b] + vfpclasspbf16 $123, (%rip){1to16}, %k5 + +// CHECK: vfpclasspbf16y $123, -1024(,%rbp,2), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vfpclasspbf16y $123, -1024(,%rbp,2), %k5 + +// CHECK: vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, (%rip){1to32}, %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b] + vfpclasspbf16 $123, (%rip){1to32}, %k5 + +// CHECK: vfpclasspbf16z $123, -2048(,%rbp,2), %k5 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vfpclasspbf16z $123, -2048(,%rbp,2), %k5 + +// CHECK: vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7} + +// CHECK: vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7} +// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7} + +// CHECK: vgetexppbf16 %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7] + vgetexppbf16 %xmm23, %xmm22 + +// CHECK: vgetexppbf16 %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7] + vgetexppbf16 %xmm23, %xmm22 {%k7} + +// CHECK: vgetexppbf16 %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7] + vgetexppbf16 %xmm23, %xmm22 {%k7} {z} + +// CHECK: vgetexppbf16 %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7] + vgetexppbf16 %zmm23, %zmm22 + +// CHECK: vgetexppbf16 %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7] + vgetexppbf16 %zmm23, %zmm22 {%k7} + +// CHECK: vgetexppbf16 %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7] + vgetexppbf16 %zmm23, %zmm22 {%k7} {z} + +// CHECK: vgetexppbf16 %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7] + vgetexppbf16 %ymm23, %ymm22 + +// CHECK: vgetexppbf16 %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7] + vgetexppbf16 %ymm23, %ymm22 {%k7} + +// CHECK: vgetexppbf16 %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7] + vgetexppbf16 %ymm23, %ymm22 {%k7} {z} + +// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] + vgetexppbf16 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] + vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vgetexppbf16 (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00] + vgetexppbf16 (%rip){1to8}, %xmm22 + +// CHECK: vgetexppbf16 -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff] + vgetexppbf16 -512(,%rbp,2), %xmm22 + +// CHECK: vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f] + vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80] + vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] + vgetexppbf16 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] + vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vgetexppbf16 (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00] + vgetexppbf16 (%rip){1to16}, %ymm22 + +// CHECK: vgetexppbf16 -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff] + vgetexppbf16 -1024(,%rbp,2), %ymm22 + +// CHECK: vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f] + vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80] + vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] + vgetexppbf16 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] + vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vgetexppbf16 (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00] + vgetexppbf16 (%rip){1to32}, %zmm22 + +// CHECK: vgetexppbf16 -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff] + vgetexppbf16 -2048(,%rbp,2), %zmm22 + +// CHECK: vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f] + vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80] + vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %zmm23, %zmm22 + +// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} + +// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %ymm23, %ymm22 + +// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} + +// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %xmm23, %xmm22 + +// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} + +// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b] + vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vgetmantpbf16 $123, (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b] + vgetmantpbf16 $123, (%rip){1to8}, %xmm22 + +// CHECK: vgetmantpbf16 $123, -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vgetmantpbf16 $123, -512(,%rbp,2), %xmm22 + +// CHECK: vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b] + vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b] + vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vgetmantpbf16 $123, (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b] + vgetmantpbf16 $123, (%rip){1to16}, %ymm22 + +// CHECK: vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22 + +// CHECK: vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b] + vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b] + vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vgetmantpbf16 $123, (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b] + vgetmantpbf16 $123, (%rip){1to32}, %zmm22 + +// CHECK: vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22 + +// CHECK: vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b] + vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b] + vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5f,0xf0] + vmaxpbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5f,0xf0] + vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5f,0xf0] + vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5f,0xf0] + vmaxpbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5f,0xf0] + vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5f,0xf0] + vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5f,0xf0] + vmaxpbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5f,0xf0] + vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5f,0xf0] + vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00] + vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f] + vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80] + vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00] + vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff] + vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f] + vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80] + vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00] + vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff] + vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f] + vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5f,0x72,0x80] + vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5d,0xf0] + vminpbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5d,0xf0] + vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5d,0xf0] + vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5d,0xf0] + vminpbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5d,0xf0] + vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5d,0xf0] + vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5d,0xf0] + vminpbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5d,0xf0] + vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5d,0xf0] + vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10] + vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00] + vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vminpbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00] + vminpbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff] + vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f] + vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80] + vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10] + vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00] + vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vminpbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00] + vminpbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff] + vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f] + vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80] + vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10] + vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00] + vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vminpbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00] + vminpbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vminpbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff] + vminpbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f] + vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5d,0x72,0x80] + vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x59,0xf0] + vmulnepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x59,0xf0] + vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x59,0xf0] + vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x59,0xf0] + vmulnepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x59,0xf0] + vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x59,0xf0] + vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x59,0xf0] + vmulnepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x59,0xf0] + vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x59,0xf0] + vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00] + vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00] + vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f] + vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x59,0x72,0x80] + vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00] + vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00] + vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff] + vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f] + vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x59,0x72,0x80] + vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00] + vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00] + vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff] + vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x59,0x71,0x7f] + vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x59,0x72,0x80] + vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vrcppbf16 %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xf7] + vrcppbf16 %xmm23, %xmm22 + +// CHECK: vrcppbf16 %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4c,0xf7] + vrcppbf16 %xmm23, %xmm22 {%k7} + +// CHECK: vrcppbf16 %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4c,0xf7] + vrcppbf16 %xmm23, %xmm22 {%k7} {z} + +// CHECK: vrcppbf16 %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xf7] + vrcppbf16 %zmm23, %zmm22 + +// CHECK: vrcppbf16 %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4c,0xf7] + vrcppbf16 %zmm23, %zmm22 {%k7} + +// CHECK: vrcppbf16 %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4c,0xf7] + vrcppbf16 %zmm23, %zmm22 {%k7} {z} + +// CHECK: vrcppbf16 %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xf7] + vrcppbf16 %ymm23, %ymm22 + +// CHECK: vrcppbf16 %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4c,0xf7] + vrcppbf16 %ymm23, %ymm22 {%k7} + +// CHECK: vrcppbf16 %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4c,0xf7] + vrcppbf16 %ymm23, %ymm22 {%k7} {z} + +// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrcppbf16 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00] + vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vrcppbf16 (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00] + vrcppbf16 (%rip){1to8}, %xmm22 + +// CHECK: vrcppbf16 -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vrcppbf16 -512(,%rbp,2), %xmm22 + +// CHECK: vrcppbf16 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f] + vrcppbf16 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80] + vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrcppbf16 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00] + vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vrcppbf16 (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00] + vrcppbf16 (%rip){1to16}, %ymm22 + +// CHECK: vrcppbf16 -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vrcppbf16 -1024(,%rbp,2), %ymm22 + +// CHECK: vrcppbf16 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f] + vrcppbf16 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80] + vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrcppbf16 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00] + vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vrcppbf16 (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00] + vrcppbf16 (%rip){1to32}, %zmm22 + +// CHECK: vrcppbf16 -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vrcppbf16 -2048(,%rbp,2), %zmm22 + +// CHECK: vrcppbf16 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f] + vrcppbf16 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80] + vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b] + vreducenepbf16 $123, %zmm23, %zmm22 + +// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b] + vreducenepbf16 $123, %zmm23, %zmm22 {%k7} + +// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b] + vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b] + vreducenepbf16 $123, %ymm23, %ymm22 + +// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b] + vreducenepbf16 $123, %ymm23, %ymm22 {%k7} + +// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b] + vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b] + vreducenepbf16 $123, %xmm23, %xmm22 + +// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b] + vreducenepbf16 $123, %xmm23, %xmm22 {%k7} + +// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b] + vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vreducenepbf16 $123, (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b] + vreducenepbf16 $123, (%rip){1to8}, %xmm22 + +// CHECK: vreducenepbf16 $123, -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vreducenepbf16 $123, -512(,%rbp,2), %xmm22 + +// CHECK: vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b] + vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b] + vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vreducenepbf16 $123, (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b] + vreducenepbf16 $123, (%rip){1to16}, %ymm22 + +// CHECK: vreducenepbf16 $123, -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vreducenepbf16 $123, -1024(,%rbp,2), %ymm22 + +// CHECK: vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b] + vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b] + vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vreducenepbf16 $123, (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b] + vreducenepbf16 $123, (%rip){1to32}, %zmm22 + +// CHECK: vreducenepbf16 $123, -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vreducenepbf16 $123, -2048(,%rbp,2), %zmm22 + +// CHECK: vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b] + vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b] + vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %zmm23, %zmm22 + +// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} + +// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %ymm23, %ymm22 + +// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} + +// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %xmm23, %xmm22 + +// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} + +// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b] + vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vrndscalenepbf16 $123, (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b] + vrndscalenepbf16 $123, (%rip){1to8}, %xmm22 + +// CHECK: vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22 + +// CHECK: vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b] + vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b] + vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vrndscalenepbf16 $123, (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b] + vrndscalenepbf16 $123, (%rip){1to16}, %ymm22 + +// CHECK: vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22 + +// CHECK: vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b] + vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b] + vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vrndscalenepbf16 $123, (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b] + vrndscalenepbf16 $123, (%rip){1to32}, %zmm22 + +// CHECK: vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22 + +// CHECK: vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b] + vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b] + vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xf7] + vrsqrtpbf16 %xmm23, %xmm22 + +// CHECK: vrsqrtpbf16 %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4e,0xf7] + vrsqrtpbf16 %xmm23, %xmm22 {%k7} + +// CHECK: vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4e,0xf7] + vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xf7] + vrsqrtpbf16 %zmm23, %zmm22 + +// CHECK: vrsqrtpbf16 %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4e,0xf7] + vrsqrtpbf16 %zmm23, %zmm22 {%k7} + +// CHECK: vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4e,0xf7] + vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xf7] + vrsqrtpbf16 %ymm23, %ymm22 + +// CHECK: vrsqrtpbf16 %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4e,0xf7] + vrsqrtpbf16 %ymm23, %ymm22 {%k7} + +// CHECK: vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4e,0xf7] + vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00] + vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vrsqrtpbf16 (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00] + vrsqrtpbf16 (%rip){1to8}, %xmm22 + +// CHECK: vrsqrtpbf16 -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff] + vrsqrtpbf16 -512(,%rbp,2), %xmm22 + +// CHECK: vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f] + vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80] + vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00] + vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vrsqrtpbf16 (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00] + vrsqrtpbf16 (%rip){1to16}, %ymm22 + +// CHECK: vrsqrtpbf16 -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff] + vrsqrtpbf16 -1024(,%rbp,2), %ymm22 + +// CHECK: vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f] + vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80] + vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00] + vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vrsqrtpbf16 (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00] + vrsqrtpbf16 (%rip){1to32}, %zmm22 + +// CHECK: vrsqrtpbf16 -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff] + vrsqrtpbf16 -2048(,%rbp,2), %zmm22 + +// CHECK: vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f] + vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80] + vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x2c,0xf0] + vscalefpbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x2c,0xf0] + vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x2c,0xf0] + vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x2c,0xf0] + vscalefpbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x2c,0xf0] + vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x2c,0xf0] + vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x2c,0xf0] + vscalefpbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x2c,0xf0] + vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x2c,0xf0] + vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00] + vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00] + vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f] + vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80] + vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00] + vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00] + vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f] + vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80] + vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00] + vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00] + vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f] + vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x2c,0x72,0x80] + vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xf7] + vsqrtnepbf16 %xmm23, %xmm22 + +// CHECK: vsqrtnepbf16 %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x51,0xf7] + vsqrtnepbf16 %xmm23, %xmm22 {%k7} + +// CHECK: vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x51,0xf7] + vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xf7] + vsqrtnepbf16 %zmm23, %zmm22 + +// CHECK: vsqrtnepbf16 %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x51,0xf7] + vsqrtnepbf16 %zmm23, %zmm22 {%k7} + +// CHECK: vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x51,0xf7] + vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xf7] + vsqrtnepbf16 %ymm23, %ymm22 + +// CHECK: vsqrtnepbf16 %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x51,0xf7] + vsqrtnepbf16 %ymm23, %ymm22 {%k7} + +// CHECK: vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x51,0xf7] + vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7} + +// CHECK: vsqrtnepbf16 (%rip){1to8}, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00] + vsqrtnepbf16 (%rip){1to8}, %xmm22 + +// CHECK: vsqrtnepbf16 -512(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsqrtnepbf16 -512(,%rbp,2), %xmm22 + +// CHECK: vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f] + vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80] + vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22 + +// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7} + +// CHECK: vsqrtnepbf16 (%rip){1to16}, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00] + vsqrtnepbf16 (%rip){1to16}, %ymm22 + +// CHECK: vsqrtnepbf16 -1024(,%rbp,2), %ymm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsqrtnepbf16 -1024(,%rbp,2), %ymm22 + +// CHECK: vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f] + vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80] + vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vsqrtnepbf16 (%rip){1to32}, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00] + vsqrtnepbf16 (%rip){1to32}, %zmm22 + +// CHECK: vsqrtnepbf16 -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsqrtnepbf16 -2048(,%rbp,2), %zmm22 + +// CHECK: vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f] + vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80] + vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z} + +// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5c,0xf0] + vsubnepbf16 %ymm24, %ymm23, %ymm22 + +// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5c,0xf0] + vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5c,0xf0] + vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5c,0xf0] + vsubnepbf16 %zmm24, %zmm23, %zmm22 + +// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5c,0xf0] + vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5c,0xf0] + vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5c,0xf0] + vsubnepbf16 %xmm24, %xmm23, %xmm22 + +// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5c,0xf0] + vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5c,0xf0] + vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00] + vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00] + vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22 + +// CHECK: vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f] + vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80] + vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00] + vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00] + vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22 + +// CHECK: vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f] + vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80] + vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00] + vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00] + vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22 + +// CHECK: vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f] + vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5c,0x72,0x80] + vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z} + diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-intel.s b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s new file mode 100644 index 00000000000000..5f3dc45ba77458 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s @@ -0,0 +1,3014 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vaddnepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x58,0xf0] + vaddnepbf16 ymm22, ymm23, ymm24 + +// CHECK: vaddnepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x58,0xf0] + vaddnepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x58,0xf0] + vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vaddnepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x58,0xf0] + vaddnepbf16 zmm22, zmm23, zmm24 + +// CHECK: vaddnepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x58,0xf0] + vaddnepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x58,0xf0] + vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vaddnepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x58,0xf0] + vaddnepbf16 xmm22, xmm23, xmm24 + +// CHECK: vaddnepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x58,0xf0] + vaddnepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x58,0xf0] + vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10] + vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00] + vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00] + vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff] + vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f] + vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x58,0x72,0x80] + vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10] + vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00] + vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00] + vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff] + vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f] + vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x58,0x72,0x80] + vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10] + vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00] + vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00] + vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff] + vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x58,0x71,0x7f] + vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x58,0x72,0x80] + vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vcmppbf16 k5, ymm23, ymm24, 123 +// CHECK: encoding: [0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b] + vcmppbf16 k5, ymm23, ymm24, 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm23, ymm24, 123 +// CHECK: encoding: [0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b] + vcmppbf16 k5 {k7}, ymm23, ymm24, 123 + +// CHECK: vcmppbf16 k5, xmm23, xmm24, 123 +// CHECK: encoding: [0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b] + vcmppbf16 k5, xmm23, xmm24, 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm23, xmm24, 123 +// CHECK: encoding: [0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b] + vcmppbf16 k5 {k7}, xmm23, xmm24, 123 + +// CHECK: vcmppbf16 k5, zmm23, zmm24, 123 +// CHECK: encoding: [0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b] + vcmppbf16 k5, zmm23, zmm24, 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm23, zmm24, 123 +// CHECK: encoding: [0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b] + vcmppbf16 k5 {k7}, zmm23, zmm24, 123 + +// CHECK: vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b] + vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123 + +// CHECK: vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b] + vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123 + +// CHECK: vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b] + vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123 + +// CHECK: vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b] + vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123 + +// CHECK: vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b] + vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123 + +// CHECK: vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b] + vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123 + +// CHECK: vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b] + vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123 + +// CHECK: vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b] + vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123 + +// CHECK: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b] + vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123 + +// CHECK: vcomsbf16 xmm22, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xf7] + vcomsbf16 xmm22, xmm23 + +// CHECK: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291] + +// CHECK: vcomsbf16 xmm22, word ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomsbf16 xmm22, word ptr [rip] + +// CHECK: vcomsbf16 xmm22, word ptr [2*rbp - 64] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] + vcomsbf16 xmm22, word ptr [2*rbp - 64] + +// CHECK: vcomsbf16 xmm22, word ptr [rcx + 254] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f] + vcomsbf16 xmm22, word ptr [rcx + 254] + +// CHECK: vcomsbf16 xmm22, word ptr [rdx - 256] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80] + vcomsbf16 xmm22, word ptr [rdx - 256] + +// CHECK: vdivnepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5e,0xf0] + vdivnepbf16 ymm22, ymm23, ymm24 + +// CHECK: vdivnepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5e,0xf0] + vdivnepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5e,0xf0] + vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vdivnepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5e,0xf0] + vdivnepbf16 zmm22, zmm23, zmm24 + +// CHECK: vdivnepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5e,0xf0] + vdivnepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5e,0xf0] + vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vdivnepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5e,0xf0] + vdivnepbf16 xmm22, xmm23, xmm24 + +// CHECK: vdivnepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5e,0xf0] + vdivnepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5e,0xf0] + vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00] + vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00] + vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff] + vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f] + vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80] + vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00] + vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00] + vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff] + vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f] + vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80] + vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00] + vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00] + vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff] + vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f] + vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5e,0x72,0x80] + vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x98,0xf0] + vfmadd132nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x98,0xf0] + vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x98,0xf0] + vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x98,0xf0] + vfmadd132nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x98,0xf0] + vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x98,0xf0] + vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x98,0xf0] + vfmadd132nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x98,0xf0] + vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x98,0xf0] + vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00] + vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f] + vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x98,0x72,0x80] + vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00] + vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f] + vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x98,0x72,0x80] + vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00] + vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x98,0x71,0x7f] + vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x98,0x72,0x80] + vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xa8,0xf0] + vfmadd213nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xa8,0xf0] + vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xa8,0xf0] + vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xa8,0xf0] + vfmadd213nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xa8,0xf0] + vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xa8,0xf0] + vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xa8,0xf0] + vfmadd213nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xa8,0xf0] + vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xa8,0xf0] + vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00] + vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f] + vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80] + vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00] + vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f] + vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80] + vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00] + vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f] + vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xa8,0x72,0x80] + vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xb8,0xf0] + vfmadd231nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xb8,0xf0] + vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xb8,0xf0] + vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xb8,0xf0] + vfmadd231nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xb8,0xf0] + vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xb8,0xf0] + vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xb8,0xf0] + vfmadd231nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xb8,0xf0] + vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xb8,0xf0] + vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00] + vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f] + vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80] + vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00] + vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f] + vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80] + vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00] + vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f] + vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xb8,0x72,0x80] + vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9a,0xf0] + vfmsub132nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9a,0xf0] + vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9a,0xf0] + vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9a,0xf0] + vfmsub132nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9a,0xf0] + vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9a,0xf0] + vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9a,0xf0] + vfmsub132nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9a,0xf0] + vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9a,0xf0] + vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00] + vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f] + vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80] + vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00] + vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f] + vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80] + vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00] + vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f] + vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9a,0x72,0x80] + vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xaa,0xf0] + vfmsub213nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xaa,0xf0] + vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xaa,0xf0] + vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xaa,0xf0] + vfmsub213nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xaa,0xf0] + vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xaa,0xf0] + vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xaa,0xf0] + vfmsub213nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xaa,0xf0] + vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xaa,0xf0] + vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00] + vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f] + vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80] + vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00] + vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f] + vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80] + vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00] + vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f] + vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xaa,0x72,0x80] + vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xba,0xf0] + vfmsub231nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xba,0xf0] + vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xba,0xf0] + vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xba,0xf0] + vfmsub231nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xba,0xf0] + vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xba,0xf0] + vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xba,0xf0] + vfmsub231nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xba,0xf0] + vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xba,0xf0] + vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00] + vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f] + vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xba,0x72,0x80] + vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00] + vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f] + vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xba,0x72,0x80] + vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00] + vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00] + vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xba,0x71,0x7f] + vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xba,0x72,0x80] + vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9c,0xf0] + vfnmadd132nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9c,0xf0] + vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9c,0xf0] + vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9c,0xf0] + vfnmadd132nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9c,0xf0] + vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9c,0xf0] + vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9c,0xf0] + vfnmadd132nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9c,0xf0] + vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9c,0xf0] + vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00] + vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f] + vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80] + vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00] + vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f] + vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80] + vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00] + vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f] + vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9c,0x72,0x80] + vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xac,0xf0] + vfnmadd213nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xac,0xf0] + vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xac,0xf0] + vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xac,0xf0] + vfnmadd213nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xac,0xf0] + vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xac,0xf0] + vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xac,0xf0] + vfnmadd213nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xac,0xf0] + vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xac,0xf0] + vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00] + vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f] + vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xac,0x72,0x80] + vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00] + vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f] + vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xac,0x72,0x80] + vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00] + vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xac,0x71,0x7f] + vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xac,0x72,0x80] + vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbc,0xf0] + vfnmadd231nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbc,0xf0] + vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbc,0xf0] + vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbc,0xf0] + vfnmadd231nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbc,0xf0] + vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbc,0xf0] + vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbc,0xf0] + vfnmadd231nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbc,0xf0] + vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbc,0xf0] + vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00] + vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f] + vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80] + vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00] + vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f] + vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80] + vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00] + vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f] + vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbc,0x72,0x80] + vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9e,0xf0] + vfnmsub132nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9e,0xf0] + vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9e,0xf0] + vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9e,0xf0] + vfnmsub132nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9e,0xf0] + vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9e,0xf0] + vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9e,0xf0] + vfnmsub132nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9e,0xf0] + vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9e,0xf0] + vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00] + vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f] + vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80] + vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00] + vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f] + vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80] + vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00] + vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f] + vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9e,0x72,0x80] + vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xae,0xf0] + vfnmsub213nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xae,0xf0] + vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xae,0xf0] + vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xae,0xf0] + vfnmsub213nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xae,0xf0] + vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xae,0xf0] + vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xae,0xf0] + vfnmsub213nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xae,0xf0] + vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xae,0xf0] + vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00] + vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f] + vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xae,0x72,0x80] + vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00] + vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f] + vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xae,0x72,0x80] + vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00] + vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xae,0x71,0x7f] + vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xae,0x72,0x80] + vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbe,0xf0] + vfnmsub231nepbf16 ymm22, ymm23, ymm24 + +// CHECK: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbe,0xf0] + vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbe,0xf0] + vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbe,0xf0] + vfnmsub231nepbf16 zmm22, zmm23, zmm24 + +// CHECK: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbe,0xf0] + vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbe,0xf0] + vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbe,0xf0] + vfnmsub231nepbf16 xmm22, xmm23, xmm24 + +// CHECK: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbe,0xf0] + vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbe,0xf0] + vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00] + vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff] + vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f] + vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80] + vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00] + vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff] + vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f] + vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80] + vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10] + vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00] + vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00] + vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff] + vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f] + vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbe,0x72,0x80] + vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vfpclasspbf16 k5, zmm23, 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b] + vfpclasspbf16 k5, zmm23, 123 + +// CHECK: vfpclasspbf16 k5 {k7}, zmm23, 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b] + vfpclasspbf16 k5 {k7}, zmm23, 123 + +// CHECK: vfpclasspbf16 k5, ymm23, 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b] + vfpclasspbf16 k5, ymm23, 123 + +// CHECK: vfpclasspbf16 k5 {k7}, ymm23, 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b] + vfpclasspbf16 k5 {k7}, ymm23, 123 + +// CHECK: vfpclasspbf16 k5, xmm23, 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b] + vfpclasspbf16 k5, xmm23, 123 + +// CHECK: vfpclasspbf16 k5 {k7}, xmm23, 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b] + vfpclasspbf16 k5 {k7}, xmm23, 123 + +// CHECK: vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b] + vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b] + vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b] + vfpclasspbf16 k5, word ptr [rip]{1to8}, 123 + +// CHECK: vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123 + +// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b] + vfpclasspbf16 k5, word ptr [rip]{1to16}, 123 + +// CHECK: vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123 + +// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b] + vfpclasspbf16 k5, word ptr [rip]{1to32}, 123 + +// CHECK: vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b] + vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123 + +// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b] + vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123 + +// CHECK: vgetexppbf16 xmm22, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7] + vgetexppbf16 xmm22, xmm23 + +// CHECK: vgetexppbf16 xmm22 {k7}, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7] + vgetexppbf16 xmm22 {k7}, xmm23 + +// CHECK: vgetexppbf16 xmm22 {k7} {z}, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7] + vgetexppbf16 xmm22 {k7} {z}, xmm23 + +// CHECK: vgetexppbf16 zmm22, zmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7] + vgetexppbf16 zmm22, zmm23 + +// CHECK: vgetexppbf16 zmm22 {k7}, zmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7] + vgetexppbf16 zmm22 {k7}, zmm23 + +// CHECK: vgetexppbf16 zmm22 {k7} {z}, zmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7] + vgetexppbf16 zmm22 {k7} {z}, zmm23 + +// CHECK: vgetexppbf16 ymm22, ymm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7] + vgetexppbf16 ymm22, ymm23 + +// CHECK: vgetexppbf16 ymm22 {k7}, ymm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7] + vgetexppbf16 ymm22 {k7}, ymm23 + +// CHECK: vgetexppbf16 ymm22 {k7} {z}, ymm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7] + vgetexppbf16 ymm22 {k7} {z}, ymm23 + +// CHECK: vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] + vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] + vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vgetexppbf16 xmm22, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00] + vgetexppbf16 xmm22, word ptr [rip]{1to8} + +// CHECK: vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff] + vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512] + +// CHECK: vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f] + vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] + +// CHECK: vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80] + vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} + +// CHECK: vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] + vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] + vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vgetexppbf16 ymm22, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00] + vgetexppbf16 ymm22, word ptr [rip]{1to16} + +// CHECK: vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff] + vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024] + +// CHECK: vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f] + vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] + +// CHECK: vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80] + vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} + +// CHECK: vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10] + vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00] + vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vgetexppbf16 zmm22, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00] + vgetexppbf16 zmm22, word ptr [rip]{1to32} + +// CHECK: vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff] + vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f] + vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80] + vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} + +// CHECK: vgetmantpbf16 zmm22, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b] + vgetmantpbf16 zmm22, zmm23, 123 + +// CHECK: vgetmantpbf16 zmm22 {k7}, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b] + vgetmantpbf16 zmm22 {k7}, zmm23, 123 + +// CHECK: vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b] + vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123 + +// CHECK: vgetmantpbf16 ymm22, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b] + vgetmantpbf16 ymm22, ymm23, 123 + +// CHECK: vgetmantpbf16 ymm22 {k7}, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b] + vgetmantpbf16 ymm22 {k7}, ymm23, 123 + +// CHECK: vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b] + vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123 + +// CHECK: vgetmantpbf16 xmm22, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b] + vgetmantpbf16 xmm22, xmm23, 123 + +// CHECK: vgetmantpbf16 xmm22 {k7}, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b] + vgetmantpbf16 xmm22 {k7}, xmm23, 123 + +// CHECK: vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b] + vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123 + +// CHECK: vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b] + vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123 + +// CHECK: vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123 + +// CHECK: vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b] + vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 + +// CHECK: vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b] + vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 + +// CHECK: vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b] + vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123 + +// CHECK: vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 + +// CHECK: vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b] + vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 + +// CHECK: vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b] + vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 + +// CHECK: vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b] + vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123 + +// CHECK: vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 + +// CHECK: vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b] + vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 + +// CHECK: vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b] + vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 + +// CHECK: vmaxpbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5f,0xf0] + vmaxpbf16 ymm22, ymm23, ymm24 + +// CHECK: vmaxpbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5f,0xf0] + vmaxpbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5f,0xf0] + vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vmaxpbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5f,0xf0] + vmaxpbf16 zmm22, zmm23, zmm24 + +// CHECK: vmaxpbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5f,0xf0] + vmaxpbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5f,0xf0] + vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vmaxpbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5f,0xf0] + vmaxpbf16 xmm22, xmm23, xmm24 + +// CHECK: vmaxpbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5f,0xf0] + vmaxpbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5f,0xf0] + vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00] + vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f] + vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80] + vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00] + vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff] + vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f] + vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80] + vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00] + vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff] + vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f] + vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5f,0x72,0x80] + vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vminpbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5d,0xf0] + vminpbf16 ymm22, ymm23, ymm24 + +// CHECK: vminpbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5d,0xf0] + vminpbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5d,0xf0] + vminpbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vminpbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5d,0xf0] + vminpbf16 zmm22, zmm23, zmm24 + +// CHECK: vminpbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5d,0xf0] + vminpbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5d,0xf0] + vminpbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vminpbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5d,0xf0] + vminpbf16 xmm22, xmm23, xmm24 + +// CHECK: vminpbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5d,0xf0] + vminpbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5d,0xf0] + vminpbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10] + vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00] + vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vminpbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00] + vminpbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff] + vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f] + vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80] + vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10] + vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00] + vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vminpbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00] + vminpbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff] + vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f] + vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80] + vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10] + vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00] + vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vminpbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00] + vminpbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff] + vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f] + vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5d,0x72,0x80] + vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vmulnepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x59,0xf0] + vmulnepbf16 ymm22, ymm23, ymm24 + +// CHECK: vmulnepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x59,0xf0] + vmulnepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x59,0xf0] + vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vmulnepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x59,0xf0] + vmulnepbf16 zmm22, zmm23, zmm24 + +// CHECK: vmulnepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x59,0xf0] + vmulnepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x59,0xf0] + vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vmulnepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x59,0xf0] + vmulnepbf16 xmm22, xmm23, xmm24 + +// CHECK: vmulnepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x59,0xf0] + vmulnepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x59,0xf0] + vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00] + vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00] + vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f] + vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x59,0x72,0x80] + vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00] + vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00] + vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff] + vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f] + vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x59,0x72,0x80] + vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00] + vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00] + vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff] + vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x59,0x71,0x7f] + vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x59,0x72,0x80] + vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vrcppbf16 xmm22, xmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xf7] + vrcppbf16 xmm22, xmm23 + +// CHECK: vrcppbf16 xmm22 {k7}, xmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4c,0xf7] + vrcppbf16 xmm22 {k7}, xmm23 + +// CHECK: vrcppbf16 xmm22 {k7} {z}, xmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4c,0xf7] + vrcppbf16 xmm22 {k7} {z}, xmm23 + +// CHECK: vrcppbf16 zmm22, zmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xf7] + vrcppbf16 zmm22, zmm23 + +// CHECK: vrcppbf16 zmm22 {k7}, zmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4c,0xf7] + vrcppbf16 zmm22 {k7}, zmm23 + +// CHECK: vrcppbf16 zmm22 {k7} {z}, zmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4c,0xf7] + vrcppbf16 zmm22 {k7} {z}, zmm23 + +// CHECK: vrcppbf16 ymm22, ymm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xf7] + vrcppbf16 ymm22, ymm23 + +// CHECK: vrcppbf16 ymm22 {k7}, ymm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4c,0xf7] + vrcppbf16 ymm22 {k7}, ymm23 + +// CHECK: vrcppbf16 ymm22 {k7} {z}, ymm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4c,0xf7] + vrcppbf16 ymm22 {k7} {z}, ymm23 + +// CHECK: vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00] + vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vrcppbf16 xmm22, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00] + vrcppbf16 xmm22, word ptr [rip]{1to8} + +// CHECK: vrcppbf16 xmm22, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vrcppbf16 xmm22, xmmword ptr [2*rbp - 512] + +// CHECK: vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f] + vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] + +// CHECK: vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80] + vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} + +// CHECK: vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00] + vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vrcppbf16 ymm22, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00] + vrcppbf16 ymm22, word ptr [rip]{1to16} + +// CHECK: vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024] + +// CHECK: vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f] + vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] + +// CHECK: vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80] + vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} + +// CHECK: vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00] + vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vrcppbf16 zmm22, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00] + vrcppbf16 zmm22, word ptr [rip]{1to32} + +// CHECK: vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f] + vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80] + vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} + +// CHECK: vreducenepbf16 zmm22, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b] + vreducenepbf16 zmm22, zmm23, 123 + +// CHECK: vreducenepbf16 zmm22 {k7}, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b] + vreducenepbf16 zmm22 {k7}, zmm23, 123 + +// CHECK: vreducenepbf16 zmm22 {k7} {z}, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b] + vreducenepbf16 zmm22 {k7} {z}, zmm23, 123 + +// CHECK: vreducenepbf16 ymm22, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b] + vreducenepbf16 ymm22, ymm23, 123 + +// CHECK: vreducenepbf16 ymm22 {k7}, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b] + vreducenepbf16 ymm22 {k7}, ymm23, 123 + +// CHECK: vreducenepbf16 ymm22 {k7} {z}, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b] + vreducenepbf16 ymm22 {k7} {z}, ymm23, 123 + +// CHECK: vreducenepbf16 xmm22, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b] + vreducenepbf16 xmm22, xmm23, 123 + +// CHECK: vreducenepbf16 xmm22 {k7}, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b] + vreducenepbf16 xmm22 {k7}, xmm23, 123 + +// CHECK: vreducenepbf16 xmm22 {k7} {z}, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b] + vreducenepbf16 xmm22 {k7} {z}, xmm23, 123 + +// CHECK: vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b] + vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123 + +// CHECK: vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123 + +// CHECK: vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b] + vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 + +// CHECK: vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b] + vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 + +// CHECK: vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b] + vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123 + +// CHECK: vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 + +// CHECK: vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b] + vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 + +// CHECK: vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b] + vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 + +// CHECK: vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b] + vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123 + +// CHECK: vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 + +// CHECK: vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b] + vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 + +// CHECK: vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b] + vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 + +// CHECK: vrndscalenepbf16 zmm22, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b] + vrndscalenepbf16 zmm22, zmm23, 123 + +// CHECK: vrndscalenepbf16 zmm22 {k7}, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b] + vrndscalenepbf16 zmm22 {k7}, zmm23, 123 + +// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b] + vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123 + +// CHECK: vrndscalenepbf16 ymm22, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b] + vrndscalenepbf16 ymm22, ymm23, 123 + +// CHECK: vrndscalenepbf16 ymm22 {k7}, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b] + vrndscalenepbf16 ymm22 {k7}, ymm23, 123 + +// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b] + vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123 + +// CHECK: vrndscalenepbf16 xmm22, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b] + vrndscalenepbf16 xmm22, xmm23, 123 + +// CHECK: vrndscalenepbf16 xmm22 {k7}, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b] + vrndscalenepbf16 xmm22 {k7}, xmm23, 123 + +// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b] + vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123 + +// CHECK: vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b] + vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123 + +// CHECK: vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123 + +// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b] + vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123 + +// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b] + vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123 + +// CHECK: vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b] + vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123 + +// CHECK: vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123 + +// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b] + vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123 + +// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b] + vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123 + +// CHECK: vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 +// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123 + +// CHECK: vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 +// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123 + +// CHECK: vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b] + vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123 + +// CHECK: vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123 + +// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b] + vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123 + +// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 +// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b] + vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123 + +// CHECK: vrsqrtpbf16 xmm22, xmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xf7] + vrsqrtpbf16 xmm22, xmm23 + +// CHECK: vrsqrtpbf16 xmm22 {k7}, xmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4e,0xf7] + vrsqrtpbf16 xmm22 {k7}, xmm23 + +// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, xmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4e,0xf7] + vrsqrtpbf16 xmm22 {k7} {z}, xmm23 + +// CHECK: vrsqrtpbf16 zmm22, zmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xf7] + vrsqrtpbf16 zmm22, zmm23 + +// CHECK: vrsqrtpbf16 zmm22 {k7}, zmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4e,0xf7] + vrsqrtpbf16 zmm22 {k7}, zmm23 + +// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, zmm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4e,0xf7] + vrsqrtpbf16 zmm22 {k7} {z}, zmm23 + +// CHECK: vrsqrtpbf16 ymm22, ymm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xf7] + vrsqrtpbf16 ymm22, ymm23 + +// CHECK: vrsqrtpbf16 ymm22 {k7}, ymm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4e,0xf7] + vrsqrtpbf16 ymm22 {k7}, ymm23 + +// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, ymm23 +// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4e,0xf7] + vrsqrtpbf16 ymm22 {k7} {z}, ymm23 + +// CHECK: vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00] + vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vrsqrtpbf16 xmm22, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00] + vrsqrtpbf16 xmm22, word ptr [rip]{1to8} + +// CHECK: vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff] + vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512] + +// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f] + vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] + +// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80] + vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} + +// CHECK: vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00] + vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vrsqrtpbf16 ymm22, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00] + vrsqrtpbf16 ymm22, word ptr [rip]{1to16} + +// CHECK: vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff] + vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024] + +// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f] + vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] + +// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80] + vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} + +// CHECK: vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00] + vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vrsqrtpbf16 zmm22, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00] + vrsqrtpbf16 zmm22, word ptr [rip]{1to32} + +// CHECK: vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff] + vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f] + vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80] + vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} + +// CHECK: vscalefpbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x20,0x2c,0xf0] + vscalefpbf16 ymm22, ymm23, ymm24 + +// CHECK: vscalefpbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0x27,0x2c,0xf0] + vscalefpbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x2c,0xf0] + vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vscalefpbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x40,0x2c,0xf0] + vscalefpbf16 zmm22, zmm23, zmm24 + +// CHECK: vscalefpbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x47,0x2c,0xf0] + vscalefpbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x2c,0xf0] + vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vscalefpbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x00,0x2c,0xf0] + vscalefpbf16 xmm22, xmm23, xmm24 + +// CHECK: vscalefpbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x07,0x2c,0xf0] + vscalefpbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x86,0x44,0x87,0x2c,0xf0] + vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00] + vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00] + vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f] + vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80] + vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00] + vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00] + vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f] + vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80] + vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00] + vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00] + vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f] + vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x2c,0x72,0x80] + vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + +// CHECK: vsqrtnepbf16 xmm22, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xf7] + vsqrtnepbf16 xmm22, xmm23 + +// CHECK: vsqrtnepbf16 xmm22 {k7}, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x51,0xf7] + vsqrtnepbf16 xmm22 {k7}, xmm23 + +// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x51,0xf7] + vsqrtnepbf16 xmm22 {k7} {z}, xmm23 + +// CHECK: vsqrtnepbf16 zmm22, zmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xf7] + vsqrtnepbf16 zmm22, zmm23 + +// CHECK: vsqrtnepbf16 zmm22 {k7}, zmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x51,0xf7] + vsqrtnepbf16 zmm22 {k7}, zmm23 + +// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, zmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x51,0xf7] + vsqrtnepbf16 zmm22 {k7} {z}, zmm23 + +// CHECK: vsqrtnepbf16 ymm22, ymm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xf7] + vsqrtnepbf16 ymm22, ymm23 + +// CHECK: vsqrtnepbf16 ymm22 {k7}, ymm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x51,0xf7] + vsqrtnepbf16 ymm22 {k7}, ymm23 + +// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, ymm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x51,0xf7] + vsqrtnepbf16 ymm22 {k7} {z}, ymm23 + +// CHECK: vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsqrtnepbf16 xmm22, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00] + vsqrtnepbf16 xmm22, word ptr [rip]{1to8} + +// CHECK: vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512] + +// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f] + vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032] + +// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80] + vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8} + +// CHECK: vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vsqrtnepbf16 ymm22, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00] + vsqrtnepbf16 ymm22, word ptr [rip]{1to16} + +// CHECK: vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024] + +// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f] + vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064] + +// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80] + vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16} + +// CHECK: vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsqrtnepbf16 zmm22, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00] + vsqrtnepbf16 zmm22, word ptr [rip]{1to32} + +// CHECK: vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f] + vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80] + vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32} + +// CHECK: vsubnepbf16 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5c,0xf0] + vsubnepbf16 ymm22, ymm23, ymm24 + +// CHECK: vsubnepbf16 ymm22 {k7}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5c,0xf0] + vsubnepbf16 ymm22 {k7}, ymm23, ymm24 + +// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24 +// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5c,0xf0] + vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24 + +// CHECK: vsubnepbf16 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5c,0xf0] + vsubnepbf16 zmm22, zmm23, zmm24 + +// CHECK: vsubnepbf16 zmm22 {k7}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5c,0xf0] + vsubnepbf16 zmm22 {k7}, zmm23, zmm24 + +// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24 +// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5c,0xf0] + vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24 + +// CHECK: vsubnepbf16 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5c,0xf0] + vsubnepbf16 xmm22, xmm23, xmm24 + +// CHECK: vsubnepbf16 xmm22 {k7}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5c,0xf0] + vsubnepbf16 xmm22 {k7}, xmm23, xmm24 + +// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24 +// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5c,0xf0] + vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24 + +// CHECK: vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00] + vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00] + vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32} + +// CHECK: vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f] + vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} +// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80] + vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32} + +// CHECK: vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00] + vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00] + vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16} + +// CHECK: vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f] + vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} +// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80] + vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16} + +// CHECK: vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00] + vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00] + vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8} + +// CHECK: vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f] + vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} +// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5c,0x72,0x80] + vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8} + diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index b88abbb461d087..286fb4904870c2 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1176,6 +1176,8 @@ static const X86FoldTableEntry Table1[] = { {X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VCOMISSrr, X86::VCOMISSrm, 0}, {X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE}, + {X86::VCOMSBF16Zrr, X86::VCOMSBF16Zrm, 0}, + {X86::VCOMSBF16Zrr_Int, X86::VCOMSBF16Zrm_Int, TB_NO_REVERSE}, {X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0}, {X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE}, {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0}, @@ -1461,6 +1463,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VEXPANDPSZ128rr, X86::VEXPANDPSZ128rm, TB_NO_REVERSE}, {X86::VEXPANDPSZ256rr, X86::VEXPANDPSZ256rm, TB_NO_REVERSE}, {X86::VEXPANDPSZrr, X86::VEXPANDPSZrm, TB_NO_REVERSE}, + {X86::VFPCLASSPBF16Z128rr, X86::VFPCLASSPBF16Z128rm, 0}, + {X86::VFPCLASSPBF16Z256rr, X86::VFPCLASSPBF16Z256rm, 0}, + {X86::VFPCLASSPBF16Zrr, X86::VFPCLASSPBF16Zrm, 0}, {X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0}, {X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0}, {X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0}, @@ -1479,6 +1484,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VFRCZPSrr, X86::VFRCZPSrm, 0}, {X86::VFRCZSDrr, X86::VFRCZSDrm, TB_NO_REVERSE}, {X86::VFRCZSSrr, X86::VFRCZSSrm, TB_NO_REVERSE}, + {X86::VGETEXPPBF16Z128r, X86::VGETEXPPBF16Z128m, 0}, + {X86::VGETEXPPBF16Z256r, X86::VGETEXPPBF16Z256m, 0}, + {X86::VGETEXPPBF16Zr, X86::VGETEXPPBF16Zm, 0}, {X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0}, {X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0}, {X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0}, @@ -1488,6 +1496,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0}, {X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0}, {X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0}, + {X86::VGETMANTPBF16Z128rri, X86::VGETMANTPBF16Z128rmi, 0}, + {X86::VGETMANTPBF16Z256rri, X86::VGETMANTPBF16Z256rmi, 0}, + {X86::VGETMANTPBF16Zrri, X86::VGETMANTPBF16Zrmi, 0}, {X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0}, {X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0}, {X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0}, @@ -1821,11 +1832,17 @@ static const X86FoldTableEntry Table1[] = { {X86::VRCP14PSZr, X86::VRCP14PSZm, 0}, {X86::VRCP28PDZr, X86::VRCP28PDZm, 0}, {X86::VRCP28PSZr, X86::VRCP28PSZm, 0}, + {X86::VRCPPBF16Z128r, X86::VRCPPBF16Z128m, 0}, + {X86::VRCPPBF16Z256r, X86::VRCPPBF16Z256m, 0}, + {X86::VRCPPBF16Zr, X86::VRCPPBF16Zm, 0}, {X86::VRCPPHZ128r, X86::VRCPPHZ128m, 0}, {X86::VRCPPHZ256r, X86::VRCPPHZ256m, 0}, {X86::VRCPPHZr, X86::VRCPPHZm, 0}, {X86::VRCPPSYr, X86::VRCPPSYm, 0}, {X86::VRCPPSr, X86::VRCPPSm, 0}, + {X86::VREDUCENEPBF16Z128rri, X86::VREDUCENEPBF16Z128rmi, 0}, + {X86::VREDUCENEPBF16Z256rri, X86::VREDUCENEPBF16Z256rmi, 0}, + {X86::VREDUCENEPBF16Zrri, X86::VREDUCENEPBF16Zrmi, 0}, {X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0}, {X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0}, {X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0}, @@ -1835,6 +1852,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0}, {X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0}, {X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0}, + {X86::VRNDSCALENEPBF16Z128rri, X86::VRNDSCALENEPBF16Z128rmi, 0}, + {X86::VRNDSCALENEPBF16Z256rri, X86::VRNDSCALENEPBF16Z256rmi, 0}, + {X86::VRNDSCALENEPBF16Zrri, X86::VRNDSCALENEPBF16Zrmi, 0}, {X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0}, {X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0}, {X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0}, @@ -1856,11 +1876,17 @@ static const X86FoldTableEntry Table1[] = { {X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0}, {X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0}, {X86::VRSQRT28PSZr, X86::VRSQRT28PSZm, 0}, + {X86::VRSQRTPBF16Z128r, X86::VRSQRTPBF16Z128m, 0}, + {X86::VRSQRTPBF16Z256r, X86::VRSQRTPBF16Z256m, 0}, + {X86::VRSQRTPBF16Zr, X86::VRSQRTPBF16Zm, 0}, {X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128m, 0}, {X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256m, 0}, {X86::VRSQRTPHZr, X86::VRSQRTPHZm, 0}, {X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0}, {X86::VRSQRTPSr, X86::VRSQRTPSm, 0}, + {X86::VSQRTNEPBF16Z128r, X86::VSQRTNEPBF16Z128m, 0}, + {X86::VSQRTNEPBF16Z256r, X86::VSQRTNEPBF16Z256m, 0}, + {X86::VSQRTNEPBF16Zr, X86::VSQRTNEPBF16Zm, 0}, {X86::VSQRTPDYr, X86::VSQRTPDYm, 0}, {X86::VSQRTPDZ128r, X86::VSQRTPDZ128m, 0}, {X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0}, @@ -2335,6 +2361,9 @@ static const X86FoldTableEntry Table2[] = { {X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16}, {X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16}, {X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16}, + {X86::VADDNEPBF16Z128rr, X86::VADDNEPBF16Z128rm, 0}, + {X86::VADDNEPBF16Z256rr, X86::VADDNEPBF16Z256rm, 0}, + {X86::VADDNEPBF16Zrr, X86::VADDNEPBF16Zrm, 0}, {X86::VADDPDYrr, X86::VADDPDYrm, 0}, {X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0}, {X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0}, @@ -2432,6 +2461,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VBROADCASTSSZ128rrkz, X86::VBROADCASTSSZ128rmkz, TB_NO_REVERSE}, {X86::VBROADCASTSSZ256rrkz, X86::VBROADCASTSSZ256rmkz, TB_NO_REVERSE}, {X86::VBROADCASTSSZrrkz, X86::VBROADCASTSSZrmkz, TB_NO_REVERSE}, + {X86::VCMPPBF16Z128rri, X86::VCMPPBF16Z128rmi, 0}, + {X86::VCMPPBF16Z256rri, X86::VCMPPBF16Z256rmi, 0}, + {X86::VCMPPBF16Zrri, X86::VCMPPBF16Zrmi, 0}, {X86::VCMPPDYrri, X86::VCMPPDYrmi, 0}, {X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0}, {X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0}, @@ -2737,6 +2769,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VDBPSADBWZ128rri, X86::VDBPSADBWZ128rmi, 0}, {X86::VDBPSADBWZ256rri, X86::VDBPSADBWZ256rmi, 0}, {X86::VDBPSADBWZrri, X86::VDBPSADBWZrmi, 0}, + {X86::VDIVNEPBF16Z128rr, X86::VDIVNEPBF16Z128rm, 0}, + {X86::VDIVNEPBF16Z256rr, X86::VDIVNEPBF16Z256rm, 0}, + {X86::VDIVNEPBF16Zrr, X86::VDIVNEPBF16Zrm, 0}, {X86::VDIVPDYrr, X86::VDIVPDYrm, 0}, {X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0}, {X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0}, @@ -2819,6 +2854,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE}, {X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0}, {X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE}, + {X86::VFPCLASSPBF16Z128rrk, X86::VFPCLASSPBF16Z128rmk, 0}, + {X86::VFPCLASSPBF16Z256rrk, X86::VFPCLASSPBF16Z256rmk, 0}, + {X86::VFPCLASSPBF16Zrrk, X86::VFPCLASSPBF16Zrmk, 0}, {X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0}, {X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0}, {X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0}, @@ -2831,6 +2869,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VFPCLASSSDZrrk, X86::VFPCLASSSDZrmk, TB_NO_REVERSE}, {X86::VFPCLASSSHZrrk, X86::VFPCLASSSHZrmk, TB_NO_REVERSE}, {X86::VFPCLASSSSZrrk, X86::VFPCLASSSSZrmk, TB_NO_REVERSE}, + {X86::VGETEXPPBF16Z128rkz, X86::VGETEXPPBF16Z128mkz, 0}, + {X86::VGETEXPPBF16Z256rkz, X86::VGETEXPPBF16Z256mkz, 0}, + {X86::VGETEXPPBF16Zrkz, X86::VGETEXPPBF16Zmkz, 0}, {X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0}, {X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0}, {X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0}, @@ -2843,6 +2884,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE}, {X86::VGETEXPSHZr, X86::VGETEXPSHZm, TB_NO_REVERSE}, {X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE}, + {X86::VGETMANTPBF16Z128rrikz, X86::VGETMANTPBF16Z128rmikz, 0}, + {X86::VGETMANTPBF16Z256rrikz, X86::VGETMANTPBF16Z256rmikz, 0}, + {X86::VGETMANTPBF16Zrrikz, X86::VGETMANTPBF16Zrmikz, 0}, {X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmikz, 0}, {X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmikz, 0}, {X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0}, @@ -2910,6 +2954,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VMAXCSHZrr, X86::VMAXCSHZrm, 0}, {X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0}, {X86::VMAXCSSrr, X86::VMAXCSSrm, 0}, + {X86::VMAXPBF16Z128rr, X86::VMAXPBF16Z128rm, 0}, + {X86::VMAXPBF16Z256rr, X86::VMAXPBF16Z256rm, 0}, + {X86::VMAXPBF16Zrr, X86::VMAXPBF16Zrm, 0}, {X86::VMAXPDYrr, X86::VMAXPDYrm, 0}, {X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0}, {X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0}, @@ -2966,6 +3013,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VMINMAXSDrri, X86::VMINMAXSDrmi, TB_NO_REVERSE}, {X86::VMINMAXSHrri, X86::VMINMAXSHrmi, TB_NO_REVERSE}, {X86::VMINMAXSSrri, X86::VMINMAXSSrmi, TB_NO_REVERSE}, + {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rm, 0}, + {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rm, 0}, + {X86::VMINPBF16Zrr, X86::VMINPBF16Zrm, 0}, {X86::VMINPDYrr, X86::VMINPDYrm, 0}, {X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0}, {X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0}, @@ -3037,6 +3087,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VMPSADBWZ256rri, X86::VMPSADBWZ256rmi, 0}, {X86::VMPSADBWZrri, X86::VMPSADBWZrmi, 0}, {X86::VMPSADBWrri, X86::VMPSADBWrmi, 0}, + {X86::VMULNEPBF16Z128rr, X86::VMULNEPBF16Z128rm, 0}, + {X86::VMULNEPBF16Z256rr, X86::VMULNEPBF16Z256rm, 0}, + {X86::VMULNEPBF16Zrr, X86::VMULNEPBF16Zrm, 0}, {X86::VMULPDYrr, X86::VMULPDYrm, 0}, {X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0}, {X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0}, @@ -3887,12 +3940,18 @@ static const X86FoldTableEntry Table2[] = { {X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0}, {X86::VRCP28SDZr, X86::VRCP28SDZm, TB_NO_REVERSE}, {X86::VRCP28SSZr, X86::VRCP28SSZm, TB_NO_REVERSE}, + {X86::VRCPPBF16Z128rkz, X86::VRCPPBF16Z128mkz, 0}, + {X86::VRCPPBF16Z256rkz, X86::VRCPPBF16Z256mkz, 0}, + {X86::VRCPPBF16Zrkz, X86::VRCPPBF16Zmkz, 0}, {X86::VRCPPHZ128rkz, X86::VRCPPHZ128mkz, 0}, {X86::VRCPPHZ256rkz, X86::VRCPPHZ256mkz, 0}, {X86::VRCPPHZrkz, X86::VRCPPHZmkz, 0}, {X86::VRCPSHZrr, X86::VRCPSHZrm, TB_NO_REVERSE}, {X86::VRCPSSr, X86::VRCPSSm, 0}, {X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE}, + {X86::VREDUCENEPBF16Z128rrikz, X86::VREDUCENEPBF16Z128rmikz, 0}, + {X86::VREDUCENEPBF16Z256rrikz, X86::VREDUCENEPBF16Z256rmikz, 0}, + {X86::VREDUCENEPBF16Zrrikz, X86::VREDUCENEPBF16Zrmikz, 0}, {X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmikz, 0}, {X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmikz, 0}, {X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0}, @@ -3905,6 +3964,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE}, {X86::VREDUCESHZrri, X86::VREDUCESHZrmi, TB_NO_REVERSE}, {X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE}, + {X86::VRNDSCALENEPBF16Z128rrikz, X86::VRNDSCALENEPBF16Z128rmikz, 0}, + {X86::VRNDSCALENEPBF16Z256rrikz, X86::VRNDSCALENEPBF16Z256rmikz, 0}, + {X86::VRNDSCALENEPBF16Zrrikz, X86::VRNDSCALENEPBF16Zrmikz, 0}, {X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0}, {X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0}, {X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0}, @@ -3936,12 +3998,18 @@ static const X86FoldTableEntry Table2[] = { {X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0}, {X86::VRSQRT28SDZr, X86::VRSQRT28SDZm, TB_NO_REVERSE}, {X86::VRSQRT28SSZr, X86::VRSQRT28SSZm, TB_NO_REVERSE}, + {X86::VRSQRTPBF16Z128rkz, X86::VRSQRTPBF16Z128mkz, 0}, + {X86::VRSQRTPBF16Z256rkz, X86::VRSQRTPBF16Z256mkz, 0}, + {X86::VRSQRTPBF16Zrkz, X86::VRSQRTPBF16Zmkz, 0}, {X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mkz, 0}, {X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mkz, 0}, {X86::VRSQRTPHZrkz, X86::VRSQRTPHZmkz, 0}, {X86::VRSQRTSHZrr, X86::VRSQRTSHZrm, TB_NO_REVERSE}, {X86::VRSQRTSSr, X86::VRSQRTSSm, 0}, {X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE}, + {X86::VSCALEFPBF16Z128rr, X86::VSCALEFPBF16Z128rm, 0}, + {X86::VSCALEFPBF16Z256rr, X86::VSCALEFPBF16Z256rm, 0}, + {X86::VSCALEFPBF16Zrr, X86::VSCALEFPBF16Zrm, 0}, {X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0}, {X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0}, {X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0}, @@ -3976,6 +4044,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0}, {X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0}, {X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0}, + {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0}, + {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0}, + {X86::VSQRTNEPBF16Zrkz, X86::VSQRTNEPBF16Zmkz, 0}, {X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0}, {X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0}, {X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0}, @@ -3995,6 +4066,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE}, {X86::VSQRTSSr, X86::VSQRTSSm, 0}, {X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE}, + {X86::VSUBNEPBF16Z128rr, X86::VSUBNEPBF16Z128rm, 0}, + {X86::VSUBNEPBF16Z256rr, X86::VSUBNEPBF16Z256rm, 0}, + {X86::VSUBNEPBF16Zrr, X86::VSUBNEPBF16Zrm, 0}, {X86::VSUBPDYrr, X86::VSUBPDYrm, 0}, {X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0}, {X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0}, @@ -4069,6 +4143,9 @@ static const X86FoldTableEntry Table2[] = { }; static const X86FoldTableEntry Table3[] = { + {X86::VADDNEPBF16Z128rrkz, X86::VADDNEPBF16Z128rmkz, 0}, + {X86::VADDNEPBF16Z256rrkz, X86::VADDNEPBF16Z256rmkz, 0}, + {X86::VADDNEPBF16Zrrkz, X86::VADDNEPBF16Zrmkz, 0}, {X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0}, {X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0}, {X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0}, @@ -4115,6 +4192,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VBROADCASTSSZ128rrk, X86::VBROADCASTSSZ128rmk, TB_NO_REVERSE}, {X86::VBROADCASTSSZ256rrk, X86::VBROADCASTSSZ256rmk, TB_NO_REVERSE}, {X86::VBROADCASTSSZrrk, X86::VBROADCASTSSZrmk, TB_NO_REVERSE}, + {X86::VCMPPBF16Z128rrik, X86::VCMPPBF16Z128rmik, 0}, + {X86::VCMPPBF16Z256rrik, X86::VCMPPBF16Z256rmik, 0}, + {X86::VCMPPBF16Zrrik, X86::VCMPPBF16Zrmik, 0}, {X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0}, {X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0}, {X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0}, @@ -4367,6 +4447,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VDBPSADBWZ128rrikz, X86::VDBPSADBWZ128rmikz, 0}, {X86::VDBPSADBWZ256rrikz, X86::VDBPSADBWZ256rmikz, 0}, {X86::VDBPSADBWZrrikz, X86::VDBPSADBWZrmikz, 0}, + {X86::VDIVNEPBF16Z128rrkz, X86::VDIVNEPBF16Z128rmkz, 0}, + {X86::VDIVNEPBF16Z256rrkz, X86::VDIVNEPBF16Z256rmkz, 0}, + {X86::VDIVNEPBF16Zrrkz, X86::VDIVNEPBF16Zrmkz, 0}, {X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0}, {X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0}, {X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0}, @@ -4409,6 +4492,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmi, 0}, {X86::VFIXUPIMMSDZrri, X86::VFIXUPIMMSDZrmi, TB_NO_REVERSE}, {X86::VFIXUPIMMSSZrri, X86::VFIXUPIMMSSZrmi, TB_NO_REVERSE}, + {X86::VFMADD132NEPBF16Z128r, X86::VFMADD132NEPBF16Z128m, 0}, + {X86::VFMADD132NEPBF16Z256r, X86::VFMADD132NEPBF16Z256m, 0}, + {X86::VFMADD132NEPBF16Zr, X86::VFMADD132NEPBF16Zm, 0}, {X86::VFMADD132PDYr, X86::VFMADD132PDYm, 0}, {X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, 0}, {X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0}, @@ -4432,6 +4518,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE}, {X86::VFMADD132SSr, X86::VFMADD132SSm, 0}, {X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_NO_REVERSE}, + {X86::VFMADD213NEPBF16Z128r, X86::VFMADD213NEPBF16Z128m, 0}, + {X86::VFMADD213NEPBF16Z256r, X86::VFMADD213NEPBF16Z256m, 0}, + {X86::VFMADD213NEPBF16Zr, X86::VFMADD213NEPBF16Zm, 0}, {X86::VFMADD213PDYr, X86::VFMADD213PDYm, 0}, {X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, 0}, {X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0}, @@ -4455,6 +4544,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE}, {X86::VFMADD213SSr, X86::VFMADD213SSm, 0}, {X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_NO_REVERSE}, + {X86::VFMADD231NEPBF16Z128r, X86::VFMADD231NEPBF16Z128m, 0}, + {X86::VFMADD231NEPBF16Z256r, X86::VFMADD231NEPBF16Z256m, 0}, + {X86::VFMADD231NEPBF16Zr, X86::VFMADD231NEPBF16Zm, 0}, {X86::VFMADD231PDYr, X86::VFMADD231PDYm, 0}, {X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, 0}, {X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0}, @@ -4533,6 +4625,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, 0}, {X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, 0}, {X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, 0}, + {X86::VFMSUB132NEPBF16Z128r, X86::VFMSUB132NEPBF16Z128m, 0}, + {X86::VFMSUB132NEPBF16Z256r, X86::VFMSUB132NEPBF16Z256m, 0}, + {X86::VFMSUB132NEPBF16Zr, X86::VFMSUB132NEPBF16Zm, 0}, {X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, 0}, {X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, 0}, {X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0}, @@ -4556,6 +4651,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE}, {X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0}, {X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_NO_REVERSE}, + {X86::VFMSUB213NEPBF16Z128r, X86::VFMSUB213NEPBF16Z128m, 0}, + {X86::VFMSUB213NEPBF16Z256r, X86::VFMSUB213NEPBF16Z256m, 0}, + {X86::VFMSUB213NEPBF16Zr, X86::VFMSUB213NEPBF16Zm, 0}, {X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, 0}, {X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, 0}, {X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0}, @@ -4579,6 +4677,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE}, {X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0}, {X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_NO_REVERSE}, + {X86::VFMSUB231NEPBF16Z128r, X86::VFMSUB231NEPBF16Z128m, 0}, + {X86::VFMSUB231NEPBF16Z256r, X86::VFMSUB231NEPBF16Z256m, 0}, + {X86::VFMSUB231NEPBF16Zr, X86::VFMSUB231NEPBF16Zm, 0}, {X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, 0}, {X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, 0}, {X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0}, @@ -4657,6 +4758,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmkz, 0}, {X86::VFMULCPHZrrkz, X86::VFMULCPHZrmkz, 0}, {X86::VFMULCSHZrrkz, X86::VFMULCSHZrmkz, TB_NO_REVERSE}, + {X86::VFNMADD132NEPBF16Z128r, X86::VFNMADD132NEPBF16Z128m, 0}, + {X86::VFNMADD132NEPBF16Z256r, X86::VFNMADD132NEPBF16Z256m, 0}, + {X86::VFNMADD132NEPBF16Zr, X86::VFNMADD132NEPBF16Zm, 0}, {X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, 0}, {X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, 0}, {X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0}, @@ -4680,6 +4784,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE}, {X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0}, {X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_NO_REVERSE}, + {X86::VFNMADD213NEPBF16Z128r, X86::VFNMADD213NEPBF16Z128m, 0}, + {X86::VFNMADD213NEPBF16Z256r, X86::VFNMADD213NEPBF16Z256m, 0}, + {X86::VFNMADD213NEPBF16Zr, X86::VFNMADD213NEPBF16Zm, 0}, {X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, 0}, {X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, 0}, {X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0}, @@ -4703,6 +4810,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE}, {X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0}, {X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_NO_REVERSE}, + {X86::VFNMADD231NEPBF16Z128r, X86::VFNMADD231NEPBF16Z128m, 0}, + {X86::VFNMADD231NEPBF16Z256r, X86::VFNMADD231NEPBF16Z256m, 0}, + {X86::VFNMADD231NEPBF16Zr, X86::VFNMADD231NEPBF16Zm, 0}, {X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, 0}, {X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, 0}, {X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0}, @@ -4734,6 +4844,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE}, {X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0}, {X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE}, + {X86::VFNMSUB132NEPBF16Z128r, X86::VFNMSUB132NEPBF16Z128m, 0}, + {X86::VFNMSUB132NEPBF16Z256r, X86::VFNMSUB132NEPBF16Z256m, 0}, + {X86::VFNMSUB132NEPBF16Zr, X86::VFNMSUB132NEPBF16Zm, 0}, {X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, 0}, {X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, 0}, {X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0}, @@ -4757,6 +4870,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE}, {X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0}, {X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_NO_REVERSE}, + {X86::VFNMSUB213NEPBF16Z128r, X86::VFNMSUB213NEPBF16Z128m, 0}, + {X86::VFNMSUB213NEPBF16Z256r, X86::VFNMSUB213NEPBF16Z256m, 0}, + {X86::VFNMSUB213NEPBF16Zr, X86::VFNMSUB213NEPBF16Zm, 0}, {X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, 0}, {X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, 0}, {X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0}, @@ -4780,6 +4896,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE}, {X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0}, {X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_NO_REVERSE}, + {X86::VFNMSUB231NEPBF16Z128r, X86::VFNMSUB231NEPBF16Z128m, 0}, + {X86::VFNMSUB231NEPBF16Z256r, X86::VFNMSUB231NEPBF16Z256m, 0}, + {X86::VFNMSUB231NEPBF16Zr, X86::VFNMSUB231NEPBF16Zm, 0}, {X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, 0}, {X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, 0}, {X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0}, @@ -4811,6 +4930,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE}, {X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0}, {X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE}, + {X86::VGETEXPPBF16Z128rk, X86::VGETEXPPBF16Z128mk, 0}, + {X86::VGETEXPPBF16Z256rk, X86::VGETEXPPBF16Z256mk, 0}, + {X86::VGETEXPPBF16Zrk, X86::VGETEXPPBF16Zmk, 0}, {X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0}, {X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0}, {X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0}, @@ -4823,6 +4945,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE}, {X86::VGETEXPSHZrkz, X86::VGETEXPSHZmkz, TB_NO_REVERSE}, {X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE}, + {X86::VGETMANTPBF16Z128rrik, X86::VGETMANTPBF16Z128rmik, 0}, + {X86::VGETMANTPBF16Z256rrik, X86::VGETMANTPBF16Z256rmik, 0}, + {X86::VGETMANTPBF16Zrrik, X86::VGETMANTPBF16Zrmik, 0}, {X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0}, {X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0}, {X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0}, @@ -4865,6 +4990,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0}, {X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0}, {X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0}, + {X86::VMAXPBF16Z128rrkz, X86::VMAXPBF16Z128rmkz, 0}, + {X86::VMAXPBF16Z256rrkz, X86::VMAXPBF16Z256rmkz, 0}, + {X86::VMAXPBF16Zrrkz, X86::VMAXPBF16Zrmkz, 0}, {X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0}, {X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0}, {X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0}, @@ -4901,6 +5029,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMINMAXSDrrikz, X86::VMINMAXSDrmikz, TB_NO_REVERSE}, {X86::VMINMAXSHrrikz, X86::VMINMAXSHrmikz, TB_NO_REVERSE}, {X86::VMINMAXSSrrikz, X86::VMINMAXSSrmikz, TB_NO_REVERSE}, + {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmkz, 0}, + {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmkz, 0}, + {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmkz, 0}, {X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0}, {X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0}, {X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0}, @@ -4955,6 +5086,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMPSADBWZ128rrikz, X86::VMPSADBWZ128rmikz, 0}, {X86::VMPSADBWZ256rrikz, X86::VMPSADBWZ256rmikz, 0}, {X86::VMPSADBWZrrikz, X86::VMPSADBWZrmikz, 0}, + {X86::VMULNEPBF16Z128rrkz, X86::VMULNEPBF16Z128rmkz, 0}, + {X86::VMULNEPBF16Z256rrkz, X86::VMULNEPBF16Z256rmkz, 0}, + {X86::VMULNEPBF16Zrrkz, X86::VMULNEPBF16Zrmkz, 0}, {X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0}, {X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0}, {X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0}, @@ -5696,10 +5830,16 @@ static const X86FoldTableEntry Table3[] = { {X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0}, {X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE}, {X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE}, + {X86::VRCPPBF16Z128rk, X86::VRCPPBF16Z128mk, 0}, + {X86::VRCPPBF16Z256rk, X86::VRCPPBF16Z256mk, 0}, + {X86::VRCPPBF16Zrk, X86::VRCPPBF16Zmk, 0}, {X86::VRCPPHZ128rk, X86::VRCPPHZ128mk, 0}, {X86::VRCPPHZ256rk, X86::VRCPPHZ256mk, 0}, {X86::VRCPPHZrk, X86::VRCPPHZmk, 0}, {X86::VRCPSHZrrkz, X86::VRCPSHZrmkz, TB_NO_REVERSE}, + {X86::VREDUCENEPBF16Z128rrik, X86::VREDUCENEPBF16Z128rmik, 0}, + {X86::VREDUCENEPBF16Z256rrik, X86::VREDUCENEPBF16Z256rmik, 0}, + {X86::VREDUCENEPBF16Zrrik, X86::VREDUCENEPBF16Zrmik, 0}, {X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0}, {X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0}, {X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0}, @@ -5712,6 +5852,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE}, {X86::VREDUCESHZrrikz, X86::VREDUCESHZrmikz, TB_NO_REVERSE}, {X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE}, + {X86::VRNDSCALENEPBF16Z128rrik, X86::VRNDSCALENEPBF16Z128rmik, 0}, + {X86::VRNDSCALENEPBF16Z256rrik, X86::VRNDSCALENEPBF16Z256rmik, 0}, + {X86::VRNDSCALENEPBF16Zrrik, X86::VRNDSCALENEPBF16Zrmik, 0}, {X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0}, {X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0}, {X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0}, @@ -5736,10 +5879,16 @@ static const X86FoldTableEntry Table3[] = { {X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0}, {X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE}, {X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE}, + {X86::VRSQRTPBF16Z128rk, X86::VRSQRTPBF16Z128mk, 0}, + {X86::VRSQRTPBF16Z256rk, X86::VRSQRTPBF16Z256mk, 0}, + {X86::VRSQRTPBF16Zrk, X86::VRSQRTPBF16Zmk, 0}, {X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mk, 0}, {X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mk, 0}, {X86::VRSQRTPHZrk, X86::VRSQRTPHZmk, 0}, {X86::VRSQRTSHZrrkz, X86::VRSQRTSHZrmkz, TB_NO_REVERSE}, + {X86::VSCALEFPBF16Z128rrkz, X86::VSCALEFPBF16Z128rmkz, 0}, + {X86::VSCALEFPBF16Z256rrkz, X86::VSCALEFPBF16Z256rmkz, 0}, + {X86::VSCALEFPBF16Zrrkz, X86::VSCALEFPBF16Zrmkz, 0}, {X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0}, {X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0}, {X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0}, @@ -5769,6 +5918,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VSM3MSG1rr, X86::VSM3MSG1rm, 0}, {X86::VSM3MSG2rr, X86::VSM3MSG2rm, 0}, {X86::VSM3RNDS2rr, X86::VSM3RNDS2rm, 0}, + {X86::VSQRTNEPBF16Z128rk, X86::VSQRTNEPBF16Z128mk, 0}, + {X86::VSQRTNEPBF16Z256rk, X86::VSQRTNEPBF16Z256mk, 0}, + {X86::VSQRTNEPBF16Zrk, X86::VSQRTNEPBF16Zmk, 0}, {X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0}, {X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0}, {X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0}, @@ -5781,6 +5933,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE}, {X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE}, {X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE}, + {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmkz, 0}, + {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmkz, 0}, + {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmkz, 0}, {X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0}, {X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0}, {X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0}, @@ -5814,6 +5969,9 @@ static const X86FoldTableEntry Table3[] = { }; static const X86FoldTableEntry Table4[] = { + {X86::VADDNEPBF16Z128rrk, X86::VADDNEPBF16Z128rmk, 0}, + {X86::VADDNEPBF16Z256rrk, X86::VADDNEPBF16Z256rmk, 0}, + {X86::VADDNEPBF16Zrrk, X86::VADDNEPBF16Zrmk, 0}, {X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0}, {X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0}, {X86::VADDPDZrrk, X86::VADDPDZrmk, 0}, @@ -5883,6 +6041,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0}, {X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0}, {X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0}, + {X86::VDIVNEPBF16Z128rrk, X86::VDIVNEPBF16Z128rmk, 0}, + {X86::VDIVNEPBF16Z256rrk, X86::VDIVNEPBF16Z256rmk, 0}, + {X86::VDIVNEPBF16Zrrk, X86::VDIVNEPBF16Zrmk, 0}, {X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0}, {X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0}, {X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0}, @@ -5935,6 +6096,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFIXUPIMMSDZrrikz, X86::VFIXUPIMMSDZrmikz, TB_NO_REVERSE}, {X86::VFIXUPIMMSSZrrik, X86::VFIXUPIMMSSZrmik, TB_NO_REVERSE}, {X86::VFIXUPIMMSSZrrikz, X86::VFIXUPIMMSSZrmikz, TB_NO_REVERSE}, + {X86::VFMADD132NEPBF16Z128rk, X86::VFMADD132NEPBF16Z128mk, 0}, + {X86::VFMADD132NEPBF16Z128rkz, X86::VFMADD132NEPBF16Z128mkz, 0}, + {X86::VFMADD132NEPBF16Z256rk, X86::VFMADD132NEPBF16Z256mk, 0}, + {X86::VFMADD132NEPBF16Z256rkz, X86::VFMADD132NEPBF16Z256mkz, 0}, + {X86::VFMADD132NEPBF16Zrk, X86::VFMADD132NEPBF16Zmk, 0}, + {X86::VFMADD132NEPBF16Zrkz, X86::VFMADD132NEPBF16Zmkz, 0}, {X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mk, 0}, {X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mkz, 0}, {X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mk, 0}, @@ -5959,6 +6126,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE}, {X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE}, {X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mk, 0}, + {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mkz, 0}, + {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mk, 0}, + {X86::VFMADD213NEPBF16Z256rkz, X86::VFMADD213NEPBF16Z256mkz, 0}, + {X86::VFMADD213NEPBF16Zrk, X86::VFMADD213NEPBF16Zmk, 0}, + {X86::VFMADD213NEPBF16Zrkz, X86::VFMADD213NEPBF16Zmkz, 0}, {X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0}, {X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mkz, 0}, {X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mk, 0}, @@ -5983,6 +6156,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE}, {X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE}, {X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mk, 0}, + {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mkz, 0}, + {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mk, 0}, + {X86::VFMADD231NEPBF16Z256rkz, X86::VFMADD231NEPBF16Z256mkz, 0}, + {X86::VFMADD231NEPBF16Zrk, X86::VFMADD231NEPBF16Zmk, 0}, + {X86::VFMADD231NEPBF16Zrkz, X86::VFMADD231NEPBF16Zmkz, 0}, {X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0}, {X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mkz, 0}, {X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mk, 0}, @@ -6069,6 +6248,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mkz, 0}, {X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmk, 0}, {X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmkz, 0}, + {X86::VFMSUB132NEPBF16Z128rk, X86::VFMSUB132NEPBF16Z128mk, 0}, + {X86::VFMSUB132NEPBF16Z128rkz, X86::VFMSUB132NEPBF16Z128mkz, 0}, + {X86::VFMSUB132NEPBF16Z256rk, X86::VFMSUB132NEPBF16Z256mk, 0}, + {X86::VFMSUB132NEPBF16Z256rkz, X86::VFMSUB132NEPBF16Z256mkz, 0}, + {X86::VFMSUB132NEPBF16Zrk, X86::VFMSUB132NEPBF16Zmk, 0}, + {X86::VFMSUB132NEPBF16Zrkz, X86::VFMSUB132NEPBF16Zmkz, 0}, {X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mk, 0}, {X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mkz, 0}, {X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mk, 0}, @@ -6093,6 +6278,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE}, {X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE}, {X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mk, 0}, + {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mkz, 0}, + {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mk, 0}, + {X86::VFMSUB213NEPBF16Z256rkz, X86::VFMSUB213NEPBF16Z256mkz, 0}, + {X86::VFMSUB213NEPBF16Zrk, X86::VFMSUB213NEPBF16Zmk, 0}, + {X86::VFMSUB213NEPBF16Zrkz, X86::VFMSUB213NEPBF16Zmkz, 0}, {X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0}, {X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mkz, 0}, {X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mk, 0}, @@ -6117,6 +6308,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE}, {X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE}, {X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mk, 0}, + {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mkz, 0}, + {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mk, 0}, + {X86::VFMSUB231NEPBF16Z256rkz, X86::VFMSUB231NEPBF16Z256mkz, 0}, + {X86::VFMSUB231NEPBF16Zrk, X86::VFMSUB231NEPBF16Zmk, 0}, + {X86::VFMSUB231NEPBF16Zrkz, X86::VFMSUB231NEPBF16Zmkz, 0}, {X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0}, {X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mkz, 0}, {X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mk, 0}, @@ -6199,6 +6396,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmk, 0}, {X86::VFMULCPHZrrk, X86::VFMULCPHZrmk, 0}, {X86::VFMULCSHZrrk, X86::VFMULCSHZrmk, TB_NO_REVERSE}, + {X86::VFNMADD132NEPBF16Z128rk, X86::VFNMADD132NEPBF16Z128mk, 0}, + {X86::VFNMADD132NEPBF16Z128rkz, X86::VFNMADD132NEPBF16Z128mkz, 0}, + {X86::VFNMADD132NEPBF16Z256rk, X86::VFNMADD132NEPBF16Z256mk, 0}, + {X86::VFNMADD132NEPBF16Z256rkz, X86::VFNMADD132NEPBF16Z256mkz, 0}, + {X86::VFNMADD132NEPBF16Zrk, X86::VFNMADD132NEPBF16Zmk, 0}, + {X86::VFNMADD132NEPBF16Zrkz, X86::VFNMADD132NEPBF16Zmkz, 0}, {X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mk, 0}, {X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mkz, 0}, {X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mk, 0}, @@ -6223,6 +6426,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE}, {X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE}, {X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mk, 0}, + {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mkz, 0}, + {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mk, 0}, + {X86::VFNMADD213NEPBF16Z256rkz, X86::VFNMADD213NEPBF16Z256mkz, 0}, + {X86::VFNMADD213NEPBF16Zrk, X86::VFNMADD213NEPBF16Zmk, 0}, + {X86::VFNMADD213NEPBF16Zrkz, X86::VFNMADD213NEPBF16Zmkz, 0}, {X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0}, {X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mkz, 0}, {X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mk, 0}, @@ -6247,6 +6456,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE}, {X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE}, {X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mk, 0}, + {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mkz, 0}, + {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mk, 0}, + {X86::VFNMADD231NEPBF16Z256rkz, X86::VFNMADD231NEPBF16Z256mkz, 0}, + {X86::VFNMADD231NEPBF16Zrk, X86::VFNMADD231NEPBF16Zmk, 0}, + {X86::VFNMADD231NEPBF16Zrkz, X86::VFNMADD231NEPBF16Zmkz, 0}, {X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0}, {X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mkz, 0}, {X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mk, 0}, @@ -6271,6 +6486,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE}, {X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE}, {X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mk, 0}, + {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mkz, 0}, + {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mk, 0}, + {X86::VFNMSUB132NEPBF16Z256rkz, X86::VFNMSUB132NEPBF16Z256mkz, 0}, + {X86::VFNMSUB132NEPBF16Zrk, X86::VFNMSUB132NEPBF16Zmk, 0}, + {X86::VFNMSUB132NEPBF16Zrkz, X86::VFNMSUB132NEPBF16Zmkz, 0}, {X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0}, {X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mkz, 0}, {X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mk, 0}, @@ -6295,6 +6516,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE}, {X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE}, {X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mk, 0}, + {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mkz, 0}, + {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mk, 0}, + {X86::VFNMSUB213NEPBF16Z256rkz, X86::VFNMSUB213NEPBF16Z256mkz, 0}, + {X86::VFNMSUB213NEPBF16Zrk, X86::VFNMSUB213NEPBF16Zmk, 0}, + {X86::VFNMSUB213NEPBF16Zrkz, X86::VFNMSUB213NEPBF16Zmkz, 0}, {X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0}, {X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mkz, 0}, {X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mk, 0}, @@ -6319,6 +6546,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE}, {X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE}, {X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mk, 0}, + {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mkz, 0}, + {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mk, 0}, + {X86::VFNMSUB231NEPBF16Z256rkz, X86::VFNMSUB231NEPBF16Z256mkz, 0}, + {X86::VFNMSUB231NEPBF16Zrk, X86::VFNMSUB231NEPBF16Zmk, 0}, + {X86::VFNMSUB231NEPBF16Zrkz, X86::VFNMSUB231NEPBF16Zmkz, 0}, {X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0}, {X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mkz, 0}, {X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mk, 0}, @@ -6379,6 +6612,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0}, {X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0}, {X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0}, + {X86::VMAXPBF16Z128rrk, X86::VMAXPBF16Z128rmk, 0}, + {X86::VMAXPBF16Z256rrk, X86::VMAXPBF16Z256rmk, 0}, + {X86::VMAXPBF16Zrrk, X86::VMAXPBF16Zrmk, 0}, {X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0}, {X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0}, {X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0}, @@ -6415,6 +6651,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMINMAXSDrrik, X86::VMINMAXSDrmik, TB_NO_REVERSE}, {X86::VMINMAXSHrrik, X86::VMINMAXSHrmik, TB_NO_REVERSE}, {X86::VMINMAXSSrrik, X86::VMINMAXSSrmik, TB_NO_REVERSE}, + {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmk, 0}, + {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmk, 0}, + {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmk, 0}, {X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0}, {X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0}, {X86::VMINPDZrrk, X86::VMINPDZrmk, 0}, @@ -6430,6 +6669,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMPSADBWZ128rrik, X86::VMPSADBWZ128rmik, 0}, {X86::VMPSADBWZ256rrik, X86::VMPSADBWZ256rmik, 0}, {X86::VMPSADBWZrrik, X86::VMPSADBWZrmik, 0}, + {X86::VMULNEPBF16Z128rrk, X86::VMULNEPBF16Z128rmk, 0}, + {X86::VMULNEPBF16Z256rrk, X86::VMULNEPBF16Z256rmk, 0}, + {X86::VMULNEPBF16Zrrk, X86::VMULNEPBF16Zrmk, 0}, {X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0}, {X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0}, {X86::VMULPDZrrk, X86::VMULPDZrmk, 0}, @@ -7005,6 +7247,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE}, {X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE}, {X86::VRSQRTSHZrrk, X86::VRSQRTSHZrmk, TB_NO_REVERSE}, + {X86::VSCALEFPBF16Z128rrk, X86::VSCALEFPBF16Z128rmk, 0}, + {X86::VSCALEFPBF16Z256rrk, X86::VSCALEFPBF16Z256rmk, 0}, + {X86::VSCALEFPBF16Zrrk, X86::VSCALEFPBF16Zrmk, 0}, {X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0}, {X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0}, {X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0}, @@ -7034,6 +7279,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE}, {X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE}, {X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE}, + {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmk, 0}, + {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmk, 0}, + {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmk, 0}, {X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0}, {X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0}, {X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0}, @@ -7264,6 +7512,9 @@ static const X86FoldTableEntry BroadcastTable1[] = { {X86::VCVTW2PHZrr, X86::VCVTW2PHZrmb, TB_BCAST_W}, {X86::VEXP2PDZr, X86::VEXP2PDZmb, TB_BCAST_SD}, {X86::VEXP2PSZr, X86::VEXP2PSZmb, TB_BCAST_SS}, + {X86::VFPCLASSPBF16Z128rr, X86::VFPCLASSPBF16Z128rmb, TB_BCAST_SH}, + {X86::VFPCLASSPBF16Z256rr, X86::VFPCLASSPBF16Z256rmb, TB_BCAST_SH}, + {X86::VFPCLASSPBF16Zrr, X86::VFPCLASSPBF16Zrmb, TB_BCAST_SH}, {X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rmb, TB_BCAST_SD}, {X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rmb, TB_BCAST_SD}, {X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrmb, TB_BCAST_SD}, @@ -7273,6 +7524,9 @@ static const X86FoldTableEntry BroadcastTable1[] = { {X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rmb, TB_BCAST_SS}, {X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rmb, TB_BCAST_SS}, {X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrmb, TB_BCAST_SS}, + {X86::VGETEXPPBF16Z128r, X86::VGETEXPPBF16Z128mb, TB_BCAST_SH}, + {X86::VGETEXPPBF16Z256r, X86::VGETEXPPBF16Z256mb, TB_BCAST_SH}, + {X86::VGETEXPPBF16Zr, X86::VGETEXPPBF16Zmb, TB_BCAST_SH}, {X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128mb, TB_BCAST_SD}, {X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256mb, TB_BCAST_SD}, {X86::VGETEXPPDZr, X86::VGETEXPPDZmb, TB_BCAST_SD}, @@ -7282,6 +7536,9 @@ static const X86FoldTableEntry BroadcastTable1[] = { {X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128mb, TB_BCAST_SS}, {X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256mb, TB_BCAST_SS}, {X86::VGETEXPPSZr, X86::VGETEXPPSZmb, TB_BCAST_SS}, + {X86::VGETMANTPBF16Z128rri, X86::VGETMANTPBF16Z128rmbi, TB_BCAST_SH}, + {X86::VGETMANTPBF16Z256rri, X86::VGETMANTPBF16Z256rmbi, TB_BCAST_SH}, + {X86::VGETMANTPBF16Zrri, X86::VGETMANTPBF16Zrmbi, TB_BCAST_SH}, {X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmbi, TB_BCAST_SD}, {X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmbi, TB_BCAST_SD}, {X86::VGETMANTPDZrri, X86::VGETMANTPDZrmbi, TB_BCAST_SD}, @@ -7366,9 +7623,15 @@ static const X86FoldTableEntry BroadcastTable1[] = { {X86::VRCP14PSZr, X86::VRCP14PSZmb, TB_BCAST_SS}, {X86::VRCP28PDZr, X86::VRCP28PDZmb, TB_BCAST_SD}, {X86::VRCP28PSZr, X86::VRCP28PSZmb, TB_BCAST_SS}, + {X86::VRCPPBF16Z128r, X86::VRCPPBF16Z128mb, TB_BCAST_SH}, + {X86::VRCPPBF16Z256r, X86::VRCPPBF16Z256mb, TB_BCAST_SH}, + {X86::VRCPPBF16Zr, X86::VRCPPBF16Zmb, TB_BCAST_SH}, {X86::VRCPPHZ128r, X86::VRCPPHZ128mb, TB_BCAST_SH}, {X86::VRCPPHZ256r, X86::VRCPPHZ256mb, TB_BCAST_SH}, {X86::VRCPPHZr, X86::VRCPPHZmb, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Z128rri, X86::VREDUCENEPBF16Z128rmbi, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Z256rri, X86::VREDUCENEPBF16Z256rmbi, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Zrri, X86::VREDUCENEPBF16Zrmbi, TB_BCAST_SH}, {X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmbi, TB_BCAST_SD}, {X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmbi, TB_BCAST_SD}, {X86::VREDUCEPDZrri, X86::VREDUCEPDZrmbi, TB_BCAST_SD}, @@ -7378,6 +7641,9 @@ static const X86FoldTableEntry BroadcastTable1[] = { {X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmbi, TB_BCAST_SS}, {X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmbi, TB_BCAST_SS}, {X86::VREDUCEPSZrri, X86::VREDUCEPSZrmbi, TB_BCAST_SS}, + {X86::VRNDSCALENEPBF16Z128rri, X86::VRNDSCALENEPBF16Z128rmbi, TB_BCAST_SH}, + {X86::VRNDSCALENEPBF16Z256rri, X86::VRNDSCALENEPBF16Z256rmbi, TB_BCAST_SH}, + {X86::VRNDSCALENEPBF16Zrri, X86::VRNDSCALENEPBF16Zrmbi, TB_BCAST_SH}, {X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmbi, TB_BCAST_SD}, {X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmbi, TB_BCAST_SD}, {X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmbi, TB_BCAST_SD}, @@ -7395,9 +7661,15 @@ static const X86FoldTableEntry BroadcastTable1[] = { {X86::VRSQRT14PSZr, X86::VRSQRT14PSZmb, TB_BCAST_SS}, {X86::VRSQRT28PDZr, X86::VRSQRT28PDZmb, TB_BCAST_SD}, {X86::VRSQRT28PSZr, X86::VRSQRT28PSZmb, TB_BCAST_SS}, + {X86::VRSQRTPBF16Z128r, X86::VRSQRTPBF16Z128mb, TB_BCAST_SH}, + {X86::VRSQRTPBF16Z256r, X86::VRSQRTPBF16Z256mb, TB_BCAST_SH}, + {X86::VRSQRTPBF16Zr, X86::VRSQRTPBF16Zmb, TB_BCAST_SH}, {X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128mb, TB_BCAST_SH}, {X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256mb, TB_BCAST_SH}, {X86::VRSQRTPHZr, X86::VRSQRTPHZmb, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Z128r, X86::VSQRTNEPBF16Z128mb, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Z256r, X86::VSQRTNEPBF16Z256mb, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Zr, X86::VSQRTNEPBF16Zmb, TB_BCAST_SH}, {X86::VSQRTPDZ128r, X86::VSQRTPDZ128mb, TB_BCAST_SD}, {X86::VSQRTPDZ256r, X86::VSQRTPDZ256mb, TB_BCAST_SD}, {X86::VSQRTPDZr, X86::VSQRTPDZmb, TB_BCAST_SD}, @@ -7410,6 +7682,9 @@ static const X86FoldTableEntry BroadcastTable1[] = { }; static const X86FoldTableEntry BroadcastTable2[] = { + {X86::VADDNEPBF16Z128rr, X86::VADDNEPBF16Z128rmb, TB_BCAST_SH}, + {X86::VADDNEPBF16Z256rr, X86::VADDNEPBF16Z256rmb, TB_BCAST_SH}, + {X86::VADDNEPBF16Zrr, X86::VADDNEPBF16Zrmb, TB_BCAST_SH}, {X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD}, {X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD}, {X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD}, @@ -7443,6 +7718,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VBLENDMPSZ128rr, X86::VBLENDMPSZ128rmb, TB_BCAST_SS}, {X86::VBLENDMPSZ256rr, X86::VBLENDMPSZ256rmb, TB_BCAST_SS}, {X86::VBLENDMPSZrr, X86::VBLENDMPSZrmb, TB_BCAST_SS}, + {X86::VCMPPBF16Z128rri, X86::VCMPPBF16Z128rmbi, TB_BCAST_SH}, + {X86::VCMPPBF16Z256rri, X86::VCMPPBF16Z256rmbi, TB_BCAST_SH}, + {X86::VCMPPBF16Zrri, X86::VCMPPBF16Zrmbi, TB_BCAST_SH}, {X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD}, {X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD}, {X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD}, @@ -7677,6 +7955,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VCVTW2PHZ128rrkz, X86::VCVTW2PHZ128rmbkz, TB_BCAST_W}, {X86::VCVTW2PHZ256rrkz, X86::VCVTW2PHZ256rmbkz, TB_BCAST_W}, {X86::VCVTW2PHZrrkz, X86::VCVTW2PHZrmbkz, TB_BCAST_W}, + {X86::VDIVNEPBF16Z128rr, X86::VDIVNEPBF16Z128rmb, TB_BCAST_SH}, + {X86::VDIVNEPBF16Z256rr, X86::VDIVNEPBF16Z256rmb, TB_BCAST_SH}, + {X86::VDIVNEPBF16Zrr, X86::VDIVNEPBF16Zrmb, TB_BCAST_SH}, {X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD}, {X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD}, {X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD}, @@ -7694,6 +7975,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VFMULCPHZ128rr, X86::VFMULCPHZ128rmb, TB_BCAST_SS}, {X86::VFMULCPHZ256rr, X86::VFMULCPHZ256rmb, TB_BCAST_SS}, {X86::VFMULCPHZrr, X86::VFMULCPHZrmb, TB_BCAST_SS}, + {X86::VFPCLASSPBF16Z128rrk, X86::VFPCLASSPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VFPCLASSPBF16Z256rrk, X86::VFPCLASSPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VFPCLASSPBF16Zrrk, X86::VFPCLASSPBF16Zrmbk, TB_BCAST_SH}, {X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmbk, TB_BCAST_SD}, {X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmbk, TB_BCAST_SD}, {X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmbk, TB_BCAST_SD}, @@ -7703,6 +7987,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmbk, TB_BCAST_SS}, {X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmbk, TB_BCAST_SS}, {X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmbk, TB_BCAST_SS}, + {X86::VGETEXPPBF16Z128rkz, X86::VGETEXPPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VGETEXPPBF16Z256rkz, X86::VGETEXPPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VGETEXPPBF16Zrkz, X86::VGETEXPPBF16Zmbkz, TB_BCAST_SH}, {X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mbkz, TB_BCAST_SD}, {X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mbkz, TB_BCAST_SD}, {X86::VGETEXPPDZrkz, X86::VGETEXPPDZmbkz, TB_BCAST_SD}, @@ -7712,6 +7999,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mbkz, TB_BCAST_SS}, {X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mbkz, TB_BCAST_SS}, {X86::VGETEXPPSZrkz, X86::VGETEXPPSZmbkz, TB_BCAST_SS}, + {X86::VGETMANTPBF16Z128rrikz, X86::VGETMANTPBF16Z128rmbikz, TB_BCAST_SH}, + {X86::VGETMANTPBF16Z256rrikz, X86::VGETMANTPBF16Z256rmbikz, TB_BCAST_SH}, + {X86::VGETMANTPBF16Zrrikz, X86::VGETMANTPBF16Zrmbikz, TB_BCAST_SH}, {X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmbikz, TB_BCAST_SD}, {X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmbikz, TB_BCAST_SD}, {X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmbikz, TB_BCAST_SD}, @@ -7736,6 +8026,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS}, {X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS}, {X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS}, + {X86::VMAXPBF16Z128rr, X86::VMAXPBF16Z128rmb, TB_BCAST_SH}, + {X86::VMAXPBF16Z256rr, X86::VMAXPBF16Z256rmb, TB_BCAST_SH}, + {X86::VMAXPBF16Zrr, X86::VMAXPBF16Zrmb, TB_BCAST_SH}, {X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD}, {X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD}, {X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD}, @@ -7766,6 +8059,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VMINMAXPSZ128rri, X86::VMINMAXPSZ128rmbi, TB_BCAST_SS}, {X86::VMINMAXPSZ256rri, X86::VMINMAXPSZ256rmbi, TB_BCAST_SS}, {X86::VMINMAXPSZrri, X86::VMINMAXPSZrmbi, TB_BCAST_SS}, + {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rmb, TB_BCAST_SH}, + {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rmb, TB_BCAST_SH}, + {X86::VMINPBF16Zrr, X86::VMINPBF16Zrmb, TB_BCAST_SH}, {X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD}, {X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD}, {X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD}, @@ -7775,6 +8071,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS}, {X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS}, {X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS}, + {X86::VMULNEPBF16Z128rr, X86::VMULNEPBF16Z128rmb, TB_BCAST_SH}, + {X86::VMULNEPBF16Z256rr, X86::VMULNEPBF16Z256rmb, TB_BCAST_SH}, + {X86::VMULNEPBF16Zrr, X86::VMULNEPBF16Zrmb, TB_BCAST_SH}, {X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD}, {X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD}, {X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD}, @@ -8068,9 +8367,15 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VRCP14PSZrkz, X86::VRCP14PSZmbkz, TB_BCAST_SS}, {X86::VRCP28PDZrkz, X86::VRCP28PDZmbkz, TB_BCAST_SD}, {X86::VRCP28PSZrkz, X86::VRCP28PSZmbkz, TB_BCAST_SS}, + {X86::VRCPPBF16Z128rkz, X86::VRCPPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VRCPPBF16Z256rkz, X86::VRCPPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VRCPPBF16Zrkz, X86::VRCPPBF16Zmbkz, TB_BCAST_SH}, {X86::VRCPPHZ128rkz, X86::VRCPPHZ128mbkz, TB_BCAST_SH}, {X86::VRCPPHZ256rkz, X86::VRCPPHZ256mbkz, TB_BCAST_SH}, {X86::VRCPPHZrkz, X86::VRCPPHZmbkz, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Z128rrikz, X86::VREDUCENEPBF16Z128rmbikz, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Z256rrikz, X86::VREDUCENEPBF16Z256rmbikz, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Zrrikz, X86::VREDUCENEPBF16Zrmbikz, TB_BCAST_SH}, {X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmbikz, TB_BCAST_SD}, {X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmbikz, TB_BCAST_SD}, {X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmbikz, TB_BCAST_SD}, @@ -8080,6 +8385,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VREDUCEPSZ128rrikz, X86::VREDUCEPSZ128rmbikz, TB_BCAST_SS}, {X86::VREDUCEPSZ256rrikz, X86::VREDUCEPSZ256rmbikz, TB_BCAST_SS}, {X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmbikz, TB_BCAST_SS}, + {X86::VRNDSCALENEPBF16Z128rrikz, X86::VRNDSCALENEPBF16Z128rmbikz, TB_BCAST_SH}, + {X86::VRNDSCALENEPBF16Z256rrikz, X86::VRNDSCALENEPBF16Z256rmbikz, TB_BCAST_SH}, + {X86::VRNDSCALENEPBF16Zrrikz, X86::VRNDSCALENEPBF16Zrmbikz, TB_BCAST_SH}, {X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmbikz, TB_BCAST_SD}, {X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmbikz, TB_BCAST_SD}, {X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmbikz, TB_BCAST_SD}, @@ -8097,9 +8405,15 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VRSQRT14PSZrkz, X86::VRSQRT14PSZmbkz, TB_BCAST_SS}, {X86::VRSQRT28PDZrkz, X86::VRSQRT28PDZmbkz, TB_BCAST_SD}, {X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmbkz, TB_BCAST_SS}, + {X86::VRSQRTPBF16Z128rkz, X86::VRSQRTPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VRSQRTPBF16Z256rkz, X86::VRSQRTPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VRSQRTPBF16Zrkz, X86::VRSQRTPBF16Zmbkz, TB_BCAST_SH}, {X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mbkz, TB_BCAST_SH}, {X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mbkz, TB_BCAST_SH}, {X86::VRSQRTPHZrkz, X86::VRSQRTPHZmbkz, TB_BCAST_SH}, + {X86::VSCALEFPBF16Z128rr, X86::VSCALEFPBF16Z128rmb, TB_BCAST_SH}, + {X86::VSCALEFPBF16Z256rr, X86::VSCALEFPBF16Z256rmb, TB_BCAST_SH}, + {X86::VSCALEFPBF16Zrr, X86::VSCALEFPBF16Zrmb, TB_BCAST_SH}, {X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rmb, TB_BCAST_SD}, {X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rmb, TB_BCAST_SD}, {X86::VSCALEFPDZrr, X86::VSCALEFPDZrmb, TB_BCAST_SD}, @@ -8123,6 +8437,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmbi, TB_BCAST_SS}, {X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmbi, TB_BCAST_SS}, {X86::VSHUFPSZrri, X86::VSHUFPSZrmbi, TB_BCAST_SS}, + {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Zrkz, X86::VSQRTNEPBF16Zmbkz, TB_BCAST_SH}, {X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mbkz, TB_BCAST_SD}, {X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mbkz, TB_BCAST_SD}, {X86::VSQRTPDZrkz, X86::VSQRTPDZmbkz, TB_BCAST_SD}, @@ -8132,6 +8449,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { {X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mbkz, TB_BCAST_SS}, {X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mbkz, TB_BCAST_SS}, {X86::VSQRTPSZrkz, X86::VSQRTPSZmbkz, TB_BCAST_SS}, + {X86::VSUBNEPBF16Z128rr, X86::VSUBNEPBF16Z128rmb, TB_BCAST_SH}, + {X86::VSUBNEPBF16Z256rr, X86::VSUBNEPBF16Z256rmb, TB_BCAST_SH}, + {X86::VSUBNEPBF16Zrr, X86::VSUBNEPBF16Zrmb, TB_BCAST_SH}, {X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD}, {X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD}, {X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD}, @@ -8162,6 +8482,9 @@ static const X86FoldTableEntry BroadcastTable2[] = { }; static const X86FoldTableEntry BroadcastTable3[] = { + {X86::VADDNEPBF16Z128rrkz, X86::VADDNEPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VADDNEPBF16Z256rrkz, X86::VADDNEPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VADDNEPBF16Zrrkz, X86::VADDNEPBF16Zrmbkz, TB_BCAST_SH}, {X86::VADDPDZ128rrkz, X86::VADDPDZ128rmbkz, TB_BCAST_SD}, {X86::VADDPDZ256rrkz, X86::VADDPDZ256rmbkz, TB_BCAST_SD}, {X86::VADDPDZrrkz, X86::VADDPDZrmbkz, TB_BCAST_SD}, @@ -8195,6 +8518,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VBLENDMPSZ128rrk, X86::VBLENDMPSZ128rmbk, TB_BCAST_SS}, {X86::VBLENDMPSZ256rrk, X86::VBLENDMPSZ256rmbk, TB_BCAST_SS}, {X86::VBLENDMPSZrrk, X86::VBLENDMPSZrmbk, TB_BCAST_SS}, + {X86::VCMPPBF16Z128rrik, X86::VCMPPBF16Z128rmbik, TB_BCAST_SH}, + {X86::VCMPPBF16Z256rrik, X86::VCMPPBF16Z256rmbik, TB_BCAST_SH}, + {X86::VCMPPBF16Zrrik, X86::VCMPPBF16Zrmbik, TB_BCAST_SH}, {X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmbik, TB_BCAST_SD}, {X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmbik, TB_BCAST_SD}, {X86::VCMPPDZrrik, X86::VCMPPDZrmbik, TB_BCAST_SD}, @@ -8429,6 +8755,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VCVTW2PHZ128rrk, X86::VCVTW2PHZ128rmbk, TB_BCAST_W}, {X86::VCVTW2PHZ256rrk, X86::VCVTW2PHZ256rmbk, TB_BCAST_W}, {X86::VCVTW2PHZrrk, X86::VCVTW2PHZrmbk, TB_BCAST_W}, + {X86::VDIVNEPBF16Z128rrkz, X86::VDIVNEPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VDIVNEPBF16Z256rrkz, X86::VDIVNEPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VDIVNEPBF16Zrrkz, X86::VDIVNEPBF16Zrmbkz, TB_BCAST_SH}, {X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmbkz, TB_BCAST_SD}, {X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmbkz, TB_BCAST_SD}, {X86::VDIVPDZrrkz, X86::VDIVPDZrmbkz, TB_BCAST_SD}, @@ -8458,6 +8787,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFIXUPIMMPSZ128rri, X86::VFIXUPIMMPSZ128rmbi, TB_BCAST_SS}, {X86::VFIXUPIMMPSZ256rri, X86::VFIXUPIMMPSZ256rmbi, TB_BCAST_SS}, {X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmbi, TB_BCAST_SS}, + {X86::VFMADD132NEPBF16Z128r, X86::VFMADD132NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Z256r, X86::VFMADD132NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Zr, X86::VFMADD132NEPBF16Zmb, TB_BCAST_SH}, {X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD}, {X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD}, {X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD}, @@ -8467,6 +8799,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS}, {X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS}, {X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS}, + {X86::VFMADD213NEPBF16Z128r, X86::VFMADD213NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Z256r, X86::VFMADD213NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Zr, X86::VFMADD213NEPBF16Zmb, TB_BCAST_SH}, {X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD}, {X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD}, {X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD}, @@ -8476,6 +8811,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS}, {X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS}, {X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS}, + {X86::VFMADD231NEPBF16Z128r, X86::VFMADD231NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Z256r, X86::VFMADD231NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Zr, X86::VFMADD231NEPBF16Zmb, TB_BCAST_SH}, {X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD}, {X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD}, {X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD}, @@ -8515,6 +8853,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS}, {X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS}, {X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS}, + {X86::VFMSUB132NEPBF16Z128r, X86::VFMSUB132NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Z256r, X86::VFMSUB132NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Zr, X86::VFMSUB132NEPBF16Zmb, TB_BCAST_SH}, {X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD}, {X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD}, {X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD}, @@ -8524,6 +8865,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS}, {X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS}, {X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS}, + {X86::VFMSUB213NEPBF16Z128r, X86::VFMSUB213NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Z256r, X86::VFMSUB213NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Zr, X86::VFMSUB213NEPBF16Zmb, TB_BCAST_SH}, {X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD}, {X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD}, {X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD}, @@ -8533,6 +8877,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS}, {X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS}, {X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS}, + {X86::VFMSUB231NEPBF16Z128r, X86::VFMSUB231NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Z256r, X86::VFMSUB231NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Zr, X86::VFMSUB231NEPBF16Zmb, TB_BCAST_SH}, {X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD}, {X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD}, {X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD}, @@ -8572,6 +8919,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFMULCPHZ128rrkz, X86::VFMULCPHZ128rmbkz, TB_BCAST_SS}, {X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmbkz, TB_BCAST_SS}, {X86::VFMULCPHZrrkz, X86::VFMULCPHZrmbkz, TB_BCAST_SS}, + {X86::VFNMADD132NEPBF16Z128r, X86::VFNMADD132NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Z256r, X86::VFNMADD132NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Zr, X86::VFNMADD132NEPBF16Zmb, TB_BCAST_SH}, {X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD}, {X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD}, {X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD}, @@ -8581,6 +8931,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS}, {X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS}, {X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS}, + {X86::VFNMADD213NEPBF16Z128r, X86::VFNMADD213NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Z256r, X86::VFNMADD213NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Zr, X86::VFNMADD213NEPBF16Zmb, TB_BCAST_SH}, {X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD}, {X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD}, {X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD}, @@ -8590,6 +8943,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS}, {X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS}, {X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS}, + {X86::VFNMADD231NEPBF16Z128r, X86::VFNMADD231NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Z256r, X86::VFNMADD231NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Zr, X86::VFNMADD231NEPBF16Zmb, TB_BCAST_SH}, {X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD}, {X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD}, {X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD}, @@ -8599,6 +8955,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS}, {X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS}, {X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS}, + {X86::VFNMSUB132NEPBF16Z128r, X86::VFNMSUB132NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Z256r, X86::VFNMSUB132NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Zr, X86::VFNMSUB132NEPBF16Zmb, TB_BCAST_SH}, {X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD}, {X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD}, {X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD}, @@ -8608,6 +8967,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS}, {X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS}, {X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS}, + {X86::VFNMSUB213NEPBF16Z128r, X86::VFNMSUB213NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Z256r, X86::VFNMSUB213NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Zr, X86::VFNMSUB213NEPBF16Zmb, TB_BCAST_SH}, {X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD}, {X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD}, {X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD}, @@ -8617,6 +8979,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS}, {X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS}, {X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS}, + {X86::VFNMSUB231NEPBF16Z128r, X86::VFNMSUB231NEPBF16Z128mb, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Z256r, X86::VFNMSUB231NEPBF16Z256mb, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Zr, X86::VFNMSUB231NEPBF16Zmb, TB_BCAST_SH}, {X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD}, {X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD}, {X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD}, @@ -8626,6 +8991,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS}, {X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS}, {X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS}, + {X86::VGETEXPPBF16Z128rk, X86::VGETEXPPBF16Z128mbk, TB_BCAST_SH}, + {X86::VGETEXPPBF16Z256rk, X86::VGETEXPPBF16Z256mbk, TB_BCAST_SH}, + {X86::VGETEXPPBF16Zrk, X86::VGETEXPPBF16Zmbk, TB_BCAST_SH}, {X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mbk, TB_BCAST_SD}, {X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mbk, TB_BCAST_SD}, {X86::VGETEXPPDZrk, X86::VGETEXPPDZmbk, TB_BCAST_SD}, @@ -8635,6 +9003,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mbk, TB_BCAST_SS}, {X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mbk, TB_BCAST_SS}, {X86::VGETEXPPSZrk, X86::VGETEXPPSZmbk, TB_BCAST_SS}, + {X86::VGETMANTPBF16Z128rrik, X86::VGETMANTPBF16Z128rmbik, TB_BCAST_SH}, + {X86::VGETMANTPBF16Z256rrik, X86::VGETMANTPBF16Z256rmbik, TB_BCAST_SH}, + {X86::VGETMANTPBF16Zrrik, X86::VGETMANTPBF16Zrmbik, TB_BCAST_SH}, {X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmbik, TB_BCAST_SD}, {X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmbik, TB_BCAST_SD}, {X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmbik, TB_BCAST_SD}, @@ -8659,6 +9030,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmbkz, TB_BCAST_SS}, {X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmbkz, TB_BCAST_SS}, {X86::VMAXCPSZrrkz, X86::VMAXCPSZrmbkz, TB_BCAST_SS}, + {X86::VMAXPBF16Z128rrkz, X86::VMAXPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VMAXPBF16Z256rrkz, X86::VMAXPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VMAXPBF16Zrrkz, X86::VMAXPBF16Zrmbkz, TB_BCAST_SH}, {X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmbkz, TB_BCAST_SD}, {X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmbkz, TB_BCAST_SD}, {X86::VMAXPDZrrkz, X86::VMAXPDZrmbkz, TB_BCAST_SD}, @@ -8689,6 +9063,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VMINMAXPSZ128rrikz, X86::VMINMAXPSZ128rmbikz, TB_BCAST_SS}, {X86::VMINMAXPSZ256rrikz, X86::VMINMAXPSZ256rmbikz, TB_BCAST_SS}, {X86::VMINMAXPSZrrikz, X86::VMINMAXPSZrmbikz, TB_BCAST_SS}, + {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmbkz, TB_BCAST_SH}, {X86::VMINPDZ128rrkz, X86::VMINPDZ128rmbkz, TB_BCAST_SD}, {X86::VMINPDZ256rrkz, X86::VMINPDZ256rmbkz, TB_BCAST_SD}, {X86::VMINPDZrrkz, X86::VMINPDZrmbkz, TB_BCAST_SD}, @@ -8698,6 +9075,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VMINPSZ128rrkz, X86::VMINPSZ128rmbkz, TB_BCAST_SS}, {X86::VMINPSZ256rrkz, X86::VMINPSZ256rmbkz, TB_BCAST_SS}, {X86::VMINPSZrrkz, X86::VMINPSZrmbkz, TB_BCAST_SS}, + {X86::VMULNEPBF16Z128rrkz, X86::VMULNEPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VMULNEPBF16Z256rrkz, X86::VMULNEPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VMULNEPBF16Zrrkz, X86::VMULNEPBF16Zrmbkz, TB_BCAST_SH}, {X86::VMULPDZ128rrkz, X86::VMULPDZ128rmbkz, TB_BCAST_SD}, {X86::VMULPDZ256rrkz, X86::VMULPDZ256rmbkz, TB_BCAST_SD}, {X86::VMULPDZrrkz, X86::VMULPDZrmbkz, TB_BCAST_SD}, @@ -9081,9 +9461,15 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VRCP14PSZrk, X86::VRCP14PSZmbk, TB_BCAST_SS}, {X86::VRCP28PDZrk, X86::VRCP28PDZmbk, TB_BCAST_SD}, {X86::VRCP28PSZrk, X86::VRCP28PSZmbk, TB_BCAST_SS}, + {X86::VRCPPBF16Z128rk, X86::VRCPPBF16Z128mbk, TB_BCAST_SH}, + {X86::VRCPPBF16Z256rk, X86::VRCPPBF16Z256mbk, TB_BCAST_SH}, + {X86::VRCPPBF16Zrk, X86::VRCPPBF16Zmbk, TB_BCAST_SH}, {X86::VRCPPHZ128rk, X86::VRCPPHZ128mbk, TB_BCAST_SH}, {X86::VRCPPHZ256rk, X86::VRCPPHZ256mbk, TB_BCAST_SH}, {X86::VRCPPHZrk, X86::VRCPPHZmbk, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Z128rrik, X86::VREDUCENEPBF16Z128rmbik, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Z256rrik, X86::VREDUCENEPBF16Z256rmbik, TB_BCAST_SH}, + {X86::VREDUCENEPBF16Zrrik, X86::VREDUCENEPBF16Zrmbik, TB_BCAST_SH}, {X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmbik, TB_BCAST_SD}, {X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmbik, TB_BCAST_SD}, {X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmbik, TB_BCAST_SD}, @@ -9093,6 +9479,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmbik, TB_BCAST_SS}, {X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmbik, TB_BCAST_SS}, {X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmbik, TB_BCAST_SS}, + {X86::VRNDSCALENEPBF16Z128rrik, X86::VRNDSCALENEPBF16Z128rmbik, TB_BCAST_SH}, + {X86::VRNDSCALENEPBF16Z256rrik, X86::VRNDSCALENEPBF16Z256rmbik, TB_BCAST_SH}, + {X86::VRNDSCALENEPBF16Zrrik, X86::VRNDSCALENEPBF16Zrmbik, TB_BCAST_SH}, {X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmbik, TB_BCAST_SD}, {X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmbik, TB_BCAST_SD}, {X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmbik, TB_BCAST_SD}, @@ -9110,9 +9499,15 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VRSQRT14PSZrk, X86::VRSQRT14PSZmbk, TB_BCAST_SS}, {X86::VRSQRT28PDZrk, X86::VRSQRT28PDZmbk, TB_BCAST_SD}, {X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmbk, TB_BCAST_SS}, + {X86::VRSQRTPBF16Z128rk, X86::VRSQRTPBF16Z128mbk, TB_BCAST_SH}, + {X86::VRSQRTPBF16Z256rk, X86::VRSQRTPBF16Z256mbk, TB_BCAST_SH}, + {X86::VRSQRTPBF16Zrk, X86::VRSQRTPBF16Zmbk, TB_BCAST_SH}, {X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mbk, TB_BCAST_SH}, {X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mbk, TB_BCAST_SH}, {X86::VRSQRTPHZrk, X86::VRSQRTPHZmbk, TB_BCAST_SH}, + {X86::VSCALEFPBF16Z128rrkz, X86::VSCALEFPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VSCALEFPBF16Z256rrkz, X86::VSCALEFPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VSCALEFPBF16Zrrkz, X86::VSCALEFPBF16Zrmbkz, TB_BCAST_SH}, {X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmbkz, TB_BCAST_SD}, {X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmbkz, TB_BCAST_SD}, {X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmbkz, TB_BCAST_SD}, @@ -9136,6 +9531,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmbikz, TB_BCAST_SS}, {X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmbikz, TB_BCAST_SS}, {X86::VSHUFPSZrrikz, X86::VSHUFPSZrmbikz, TB_BCAST_SS}, + {X86::VSQRTNEPBF16Z128rk, X86::VSQRTNEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Z256rk, X86::VSQRTNEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VSQRTNEPBF16Zrk, X86::VSQRTNEPBF16Zmbk, TB_BCAST_SH}, {X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mbk, TB_BCAST_SD}, {X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mbk, TB_BCAST_SD}, {X86::VSQRTPDZrk, X86::VSQRTPDZmbk, TB_BCAST_SD}, @@ -9145,6 +9543,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mbk, TB_BCAST_SS}, {X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mbk, TB_BCAST_SS}, {X86::VSQRTPSZrk, X86::VSQRTPSZmbk, TB_BCAST_SS}, + {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmbkz, TB_BCAST_SH}, + {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmbkz, TB_BCAST_SH}, + {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmbkz, TB_BCAST_SH}, {X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmbkz, TB_BCAST_SD}, {X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmbkz, TB_BCAST_SD}, {X86::VSUBPDZrrkz, X86::VSUBPDZrmbkz, TB_BCAST_SD}, @@ -9175,6 +9576,9 @@ static const X86FoldTableEntry BroadcastTable3[] = { }; static const X86FoldTableEntry BroadcastTable4[] = { + {X86::VADDNEPBF16Z128rrk, X86::VADDNEPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VADDNEPBF16Z256rrk, X86::VADDNEPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VADDNEPBF16Zrrk, X86::VADDNEPBF16Zrmbk, TB_BCAST_SH}, {X86::VADDPDZ128rrk, X86::VADDPDZ128rmbk, TB_BCAST_SD}, {X86::VADDPDZ256rrk, X86::VADDPDZ256rmbk, TB_BCAST_SD}, {X86::VADDPDZrrk, X86::VADDPDZrmbk, TB_BCAST_SD}, @@ -9232,6 +9636,9 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmbk, TB_BCAST_SS}, {X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmbk, TB_BCAST_SS}, {X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmbk, TB_BCAST_SS}, + {X86::VDIVNEPBF16Z128rrk, X86::VDIVNEPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VDIVNEPBF16Z256rrk, X86::VDIVNEPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VDIVNEPBF16Zrrk, X86::VDIVNEPBF16Zrmbk, TB_BCAST_SH}, {X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmbk, TB_BCAST_SD}, {X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmbk, TB_BCAST_SD}, {X86::VDIVPDZrrk, X86::VDIVPDZrmbk, TB_BCAST_SD}, @@ -9274,6 +9681,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFIXUPIMMPSZ256rrikz, X86::VFIXUPIMMPSZ256rmbikz, TB_BCAST_SS}, {X86::VFIXUPIMMPSZrrik, X86::VFIXUPIMMPSZrmbik, TB_BCAST_SS}, {X86::VFIXUPIMMPSZrrikz, X86::VFIXUPIMMPSZrmbikz, TB_BCAST_SS}, + {X86::VFMADD132NEPBF16Z128rk, X86::VFMADD132NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Z128rkz, X86::VFMADD132NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Z256rk, X86::VFMADD132NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Z256rkz, X86::VFMADD132NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Zrk, X86::VFMADD132NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFMADD132NEPBF16Zrkz, X86::VFMADD132NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mbk, TB_BCAST_SD}, {X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mbkz, TB_BCAST_SD}, {X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mbk, TB_BCAST_SD}, @@ -9292,6 +9705,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFMADD132PSZ256rkz, X86::VFMADD132PSZ256mbkz, TB_BCAST_SS}, {X86::VFMADD132PSZrk, X86::VFMADD132PSZmbk, TB_BCAST_SS}, {X86::VFMADD132PSZrkz, X86::VFMADD132PSZmbkz, TB_BCAST_SS}, + {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Z256rkz, X86::VFMADD213NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Zrk, X86::VFMADD213NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFMADD213NEPBF16Zrkz, X86::VFMADD213NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mbk, TB_BCAST_SD}, {X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mbkz, TB_BCAST_SD}, {X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mbk, TB_BCAST_SD}, @@ -9310,6 +9729,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFMADD213PSZ256rkz, X86::VFMADD213PSZ256mbkz, TB_BCAST_SS}, {X86::VFMADD213PSZrk, X86::VFMADD213PSZmbk, TB_BCAST_SS}, {X86::VFMADD213PSZrkz, X86::VFMADD213PSZmbkz, TB_BCAST_SS}, + {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Z256rkz, X86::VFMADD231NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Zrk, X86::VFMADD231NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFMADD231NEPBF16Zrkz, X86::VFMADD231NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mbk, TB_BCAST_SD}, {X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mbkz, TB_BCAST_SD}, {X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mbk, TB_BCAST_SD}, @@ -9388,6 +9813,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mbkz, TB_BCAST_SS}, {X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmbk, TB_BCAST_SS}, {X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmbkz, TB_BCAST_SS}, + {X86::VFMSUB132NEPBF16Z128rk, X86::VFMSUB132NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Z128rkz, X86::VFMSUB132NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Z256rk, X86::VFMSUB132NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Z256rkz, X86::VFMSUB132NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Zrk, X86::VFMSUB132NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFMSUB132NEPBF16Zrkz, X86::VFMSUB132NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mbk, TB_BCAST_SD}, {X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mbkz, TB_BCAST_SD}, {X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mbk, TB_BCAST_SD}, @@ -9406,6 +9837,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFMSUB132PSZ256rkz, X86::VFMSUB132PSZ256mbkz, TB_BCAST_SS}, {X86::VFMSUB132PSZrk, X86::VFMSUB132PSZmbk, TB_BCAST_SS}, {X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmbkz, TB_BCAST_SS}, + {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Z256rkz, X86::VFMSUB213NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Zrk, X86::VFMSUB213NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFMSUB213NEPBF16Zrkz, X86::VFMSUB213NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mbk, TB_BCAST_SD}, {X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mbkz, TB_BCAST_SD}, {X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mbk, TB_BCAST_SD}, @@ -9424,6 +9861,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFMSUB213PSZ256rkz, X86::VFMSUB213PSZ256mbkz, TB_BCAST_SS}, {X86::VFMSUB213PSZrk, X86::VFMSUB213PSZmbk, TB_BCAST_SS}, {X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmbkz, TB_BCAST_SS}, + {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Z256rkz, X86::VFMSUB231NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Zrk, X86::VFMSUB231NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFMSUB231NEPBF16Zrkz, X86::VFMSUB231NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mbk, TB_BCAST_SD}, {X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mbkz, TB_BCAST_SD}, {X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mbk, TB_BCAST_SD}, @@ -9499,6 +9942,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFMULCPHZ128rrk, X86::VFMULCPHZ128rmbk, TB_BCAST_SS}, {X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmbk, TB_BCAST_SS}, {X86::VFMULCPHZrrk, X86::VFMULCPHZrmbk, TB_BCAST_SS}, + {X86::VFNMADD132NEPBF16Z128rk, X86::VFNMADD132NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Z128rkz, X86::VFNMADD132NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Z256rk, X86::VFNMADD132NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Z256rkz, X86::VFNMADD132NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Zrk, X86::VFNMADD132NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFNMADD132NEPBF16Zrkz, X86::VFNMADD132NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mbk, TB_BCAST_SD}, {X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mbkz, TB_BCAST_SD}, {X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mbk, TB_BCAST_SD}, @@ -9517,6 +9966,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFNMADD132PSZ256rkz, X86::VFNMADD132PSZ256mbkz, TB_BCAST_SS}, {X86::VFNMADD132PSZrk, X86::VFNMADD132PSZmbk, TB_BCAST_SS}, {X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmbkz, TB_BCAST_SS}, + {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Z256rkz, X86::VFNMADD213NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Zrk, X86::VFNMADD213NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFNMADD213NEPBF16Zrkz, X86::VFNMADD213NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mbk, TB_BCAST_SD}, {X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mbkz, TB_BCAST_SD}, {X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mbk, TB_BCAST_SD}, @@ -9535,6 +9990,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFNMADD213PSZ256rkz, X86::VFNMADD213PSZ256mbkz, TB_BCAST_SS}, {X86::VFNMADD213PSZrk, X86::VFNMADD213PSZmbk, TB_BCAST_SS}, {X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmbkz, TB_BCAST_SS}, + {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Z256rkz, X86::VFNMADD231NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Zrk, X86::VFNMADD231NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFNMADD231NEPBF16Zrkz, X86::VFNMADD231NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mbk, TB_BCAST_SD}, {X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mbkz, TB_BCAST_SD}, {X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mbk, TB_BCAST_SD}, @@ -9553,6 +10014,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFNMADD231PSZ256rkz, X86::VFNMADD231PSZ256mbkz, TB_BCAST_SS}, {X86::VFNMADD231PSZrk, X86::VFNMADD231PSZmbk, TB_BCAST_SS}, {X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmbkz, TB_BCAST_SS}, + {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Z256rkz, X86::VFNMSUB132NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Zrk, X86::VFNMSUB132NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFNMSUB132NEPBF16Zrkz, X86::VFNMSUB132NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mbk, TB_BCAST_SD}, {X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mbkz, TB_BCAST_SD}, {X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mbk, TB_BCAST_SD}, @@ -9571,6 +10038,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFNMSUB132PSZ256rkz, X86::VFNMSUB132PSZ256mbkz, TB_BCAST_SS}, {X86::VFNMSUB132PSZrk, X86::VFNMSUB132PSZmbk, TB_BCAST_SS}, {X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmbkz, TB_BCAST_SS}, + {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Z256rkz, X86::VFNMSUB213NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Zrk, X86::VFNMSUB213NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFNMSUB213NEPBF16Zrkz, X86::VFNMSUB213NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mbk, TB_BCAST_SD}, {X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mbkz, TB_BCAST_SD}, {X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mbk, TB_BCAST_SD}, @@ -9589,6 +10062,12 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VFNMSUB213PSZ256rkz, X86::VFNMSUB213PSZ256mbkz, TB_BCAST_SS}, {X86::VFNMSUB213PSZrk, X86::VFNMSUB213PSZmbk, TB_BCAST_SS}, {X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmbkz, TB_BCAST_SS}, + {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mbk, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mbkz, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mbk, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Z256rkz, X86::VFNMSUB231NEPBF16Z256mbkz, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Zrk, X86::VFNMSUB231NEPBF16Zmbk, TB_BCAST_SH}, + {X86::VFNMSUB231NEPBF16Zrkz, X86::VFNMSUB231NEPBF16Zmbkz, TB_BCAST_SH}, {X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mbk, TB_BCAST_SD}, {X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mbkz, TB_BCAST_SD}, {X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mbk, TB_BCAST_SD}, @@ -9622,6 +10101,9 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmbk, TB_BCAST_SS}, {X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmbk, TB_BCAST_SS}, {X86::VMAXCPSZrrk, X86::VMAXCPSZrmbk, TB_BCAST_SS}, + {X86::VMAXPBF16Z128rrk, X86::VMAXPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VMAXPBF16Z256rrk, X86::VMAXPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VMAXPBF16Zrrk, X86::VMAXPBF16Zrmbk, TB_BCAST_SH}, {X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmbk, TB_BCAST_SD}, {X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmbk, TB_BCAST_SD}, {X86::VMAXPDZrrk, X86::VMAXPDZrmbk, TB_BCAST_SD}, @@ -9652,6 +10134,9 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VMINMAXPSZ128rrik, X86::VMINMAXPSZ128rmbik, TB_BCAST_SS}, {X86::VMINMAXPSZ256rrik, X86::VMINMAXPSZ256rmbik, TB_BCAST_SS}, {X86::VMINMAXPSZrrik, X86::VMINMAXPSZrmbik, TB_BCAST_SS}, + {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmbk, TB_BCAST_SH}, {X86::VMINPDZ128rrk, X86::VMINPDZ128rmbk, TB_BCAST_SD}, {X86::VMINPDZ256rrk, X86::VMINPDZ256rmbk, TB_BCAST_SD}, {X86::VMINPDZrrk, X86::VMINPDZrmbk, TB_BCAST_SD}, @@ -9661,6 +10146,9 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VMINPSZ128rrk, X86::VMINPSZ128rmbk, TB_BCAST_SS}, {X86::VMINPSZ256rrk, X86::VMINPSZ256rmbk, TB_BCAST_SS}, {X86::VMINPSZrrk, X86::VMINPSZrmbk, TB_BCAST_SS}, + {X86::VMULNEPBF16Z128rrk, X86::VMULNEPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VMULNEPBF16Z256rrk, X86::VMULNEPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VMULNEPBF16Zrrk, X86::VMULNEPBF16Zrmbk, TB_BCAST_SH}, {X86::VMULPDZ128rrk, X86::VMULPDZ128rmbk, TB_BCAST_SD}, {X86::VMULPDZ256rrk, X86::VMULPDZ256rmbk, TB_BCAST_SD}, {X86::VMULPDZrrk, X86::VMULPDZrmbk, TB_BCAST_SD}, @@ -10023,6 +10511,9 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VRANGEPSZ128rrik, X86::VRANGEPSZ128rmbik, TB_BCAST_SS}, {X86::VRANGEPSZ256rrik, X86::VRANGEPSZ256rmbik, TB_BCAST_SS}, {X86::VRANGEPSZrrik, X86::VRANGEPSZrmbik, TB_BCAST_SS}, + {X86::VSCALEFPBF16Z128rrk, X86::VSCALEFPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VSCALEFPBF16Z256rrk, X86::VSCALEFPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VSCALEFPBF16Zrrk, X86::VSCALEFPBF16Zrmbk, TB_BCAST_SH}, {X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmbk, TB_BCAST_SD}, {X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmbk, TB_BCAST_SD}, {X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmbk, TB_BCAST_SD}, @@ -10046,6 +10537,9 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmbik, TB_BCAST_SS}, {X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmbik, TB_BCAST_SS}, {X86::VSHUFPSZrrik, X86::VSHUFPSZrmbik, TB_BCAST_SS}, + {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmbk, TB_BCAST_SH}, + {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmbk, TB_BCAST_SH}, + {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmbk, TB_BCAST_SH}, {X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmbk, TB_BCAST_SD}, {X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmbk, TB_BCAST_SD}, {X86::VSUBPDZrrk, X86::VSUBPDZrmbk, TB_BCAST_SD},