From 8c9f45e2decbb68dbf83794f98291b53f59390f8 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 16 Apr 2024 17:08:02 -0700 Subject: [PATCH] [ARM64EC] Fix arm_neon.h on ARM64EC. (#88572) Since 97fe519d, in ARM64EC mode, we don't define `__aarch64__`. Fix various preprocessor guards to account for this. --- clang/include/clang/Basic/arm_fp16.td | 2 +- clang/include/clang/Basic/arm_neon.td | 58 +++++++++++++-------------- clang/utils/TableGen/NeonEmitter.cpp | 8 ++-- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index cb2a09303e8e12..d36b4617bef5d2 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -14,7 +14,7 @@ include "arm_neon_incl.td" // ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in { // Negate def VNEGSH : SInst<"vneg", "11", "Sh">; diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 7edac5afafaa99..6d655c39360d3b 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -605,11 +605,11 @@ def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>; def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>; def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>; -let ArchGuard = "!defined(__aarch64__)" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)" in { def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>; def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>; } -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { def A64_VQDMULH_LANE : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">; def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">; } @@ -686,7 +686,7 @@ multiclass REINTERPRET_CROSS_TYPES { // E.3.31 Vector reinterpret cast operations def VREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs"> { - let ArchGuard = "!defined(__aarch64__)"; + let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)"; let BigEndianSafe = 1; } @@ -714,7 +714,7 @@ def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store @@ -1091,14 +1091,14 @@ let isLaneQ = 1 in { def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi">; def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in { def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> { let isLaneQ = 1; } def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> { let isLaneQ = 1; } -} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" +} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" // Note: d type implemented by SCALAR_VMULX_LANE def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; @@ -1143,7 +1143,7 @@ def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in { def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">; def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">; def RAX1 : SInst<"vrax1", "...", "QUl">; @@ -1153,14 +1153,14 @@ def XAR : SInst<"vxar", "...I", "QUl">; } } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in { def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">; def SHA512su1 : SInst<"vsha512su1", "....", "QUl">; def SHA512H : SInst<"vsha512h", "....", "QUl">; def SHA512H2 : SInst<"vsha512h2", "....", "QUl">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in { def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">; def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">; def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">; @@ -1170,7 +1170,7 @@ def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">; def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in { def SM4E : SInst<"vsm4e", "...", "QUi">; def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">; } @@ -1193,7 +1193,7 @@ def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">; def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">; } -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">; def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">; def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">; @@ -1217,7 +1217,7 @@ def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">; def FRINTI_S32 : SInst<"vrndi", "..", "fQf">; } -let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { +let ArchGuard = "(defined(__aarch64__) || defined(__arm64ec__)) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { def FRINTN_S64 : SInst<"vrndn", "..", "dQd">; def FRINTA_S64 : SInst<"vrnda", "..", "dQd">; def FRINTP_S64 : SInst<"vrndp", "..", "dQd">; @@ -1227,7 +1227,7 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">; def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.5a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.5a" in { def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">; def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">; def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">; @@ -1247,7 +1247,7 @@ def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">; def FMINNM_S32 : SInst<"vminnm", "...", "fQf">; } -let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { +let ArchGuard = "(defined(__aarch64__) || defined(__arm64ec__)) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">; def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; } @@ -1289,7 +1289,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; // itself during generation so, unlike all other intrinsics, this one should // include *all* types, not just additional ones. def VVREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk"> { - let ArchGuard = "defined(__aarch64__)"; + let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"; let BigEndianSafe = 1; } @@ -1401,7 +1401,7 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">; // Scalar Integer Saturating Rounding Doubling Multiply Half High def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in { //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">; @@ -1409,7 +1409,7 @@ def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "1111", "SsSi">; -} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" +} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended @@ -1651,7 +1651,7 @@ def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcS let isLaneQ = 1; } -} // ArchGuard = "defined(__aarch64__)" +} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" // ARMv8.2-A FP16 vector intrinsics for A32/A64. let TargetGuard = "fullfp16" in { @@ -1775,7 +1775,7 @@ def VEXTH : WInst<"vext", "...I", "hQh">; def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; // ARMv8.2-A FP16 vector intrinsics for A64 only. -let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in { // Vector rounding def FRINTIH : SInst<"vrndi", "..", "hQh">; @@ -1856,7 +1856,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; } -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { // Permutation def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; @@ -1876,7 +1876,7 @@ let TargetGuard = "dotprod" in { def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">; def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod" in { // Variants indexing into a 128-bit vector are A64 only. def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(< { let isLaneQ = 1; @@ -1884,7 +1884,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in { } // v8.2-A FP16 fused multiply-add long instructions. -let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp16fml" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fp16fml" in { def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">; def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">; def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">; @@ -1918,7 +1918,7 @@ let TargetGuard = "i8mm" in { def VUSDOT_LANE : SOpInst<"vusdot_lane", "..(<; def VSUDOT_LANE : SOpInst<"vsudot_lane", "..(<<)(<; - let ArchGuard = "defined(__aarch64__)" in { + let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { let isLaneQ = 1 in { def VUSDOT_LANEQ : SOpInst<"vusdot_laneq", "..(<; def VSUDOT_LANEQ : SOpInst<"vsudot_laneq", "..(<<)(<; @@ -1986,7 +1986,7 @@ let TargetGuard = "v8.3a" in { defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.3a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a" in { def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">; def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">; @@ -2058,14 +2058,14 @@ let TargetGuard = "bf16" in { def SCALAR_CVT_F32_BF16 : SOpInst<"vcvtah_f32", "(1F>)(1!)", "b", OP_CVT_F32_BF16>; } -let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in { def VCVT_BF16_F32_A32_INTERNAL : WInst<"__a32_vcvt_bf16", "BQ", "f">; def VCVT_BF16_F32_A32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A32>; def VCVT_LOW_BF16_F32_A32 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A32>; def VCVT_HIGH_BF16_F32_A32 : SOpInst<"vcvt_high_bf16", "BBQ", "Qf", OP_VCVT_BF16_F32_HI_A32>; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in { def VCVT_LOW_BF16_F32_A64_INTERNAL : WInst<"__a64_vcvtq_low_bf16", "BQ", "Hf">; def VCVT_LOW_BF16_F32_A64 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A64>; def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">; @@ -2077,14 +2077,14 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>; } -let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in { let BigEndianSafe = 1 in { defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES< "csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">; } } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in { let BigEndianSafe = 1 in { defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES< "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">; @@ -2092,7 +2092,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { } // v8.9a/v9.4a LRCPC3 intrinsics -let ArchGuard = "defined(__aarch64__)", TargetGuard = "rcpc3" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3" in { def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">; def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">; } diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 04e1acc2705004..56f1fdf9ef574f 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2266,7 +2266,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { InIfdef = false; } if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; InIfdef = true; } @@ -2299,7 +2299,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { InIfdef = false; } if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; InIfdef = true; } @@ -2381,7 +2381,7 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#include \n"; // For now, signedness of polynomial types depends on target - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; OS << "typedef uint8_t poly8_t;\n"; OS << "typedef uint16_t poly16_t;\n"; OS << "typedef uint64_t poly64_t;\n"; @@ -2582,7 +2582,7 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) { OS << "typedef float float32_t;\n"; OS << "typedef __fp16 float16_t;\n"; - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; OS << "typedef double float64_t;\n"; OS << "#endif\n\n";