diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 82a1ba3c82ad35..31ec84143f65c1 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -68,6 +68,9 @@ TARGET_BUILTIN(__builtin_arm_ldg, "v*v*", "t", "mte") TARGET_BUILTIN(__builtin_arm_stg, "vv*", "t", "mte") TARGET_BUILTIN(__builtin_arm_subp, "Uiv*v*", "t", "mte") +// SME state function +BUILTIN(__builtin_arm_get_sme_state, "vULi*ULi*", "n") + // Memory Operations TARGET_BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "", "mte,mops") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5081062da2862e..f71dbf1729a1d6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10430,6 +10430,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } + if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) { + // Create call to __arm_sme_state and store the results to the two pointers. + CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, + false), + "__arm_sme_state")); + auto Attrs = + AttributeList() + .addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible") + .addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved"); + CI->setAttributes(Attrs); + CI->setCallingConv( + llvm::CallingConv:: + AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); + Builder.CreateStore(Builder.CreateExtractValue(CI, 0), + EmitPointerWithAlignment(E->getArg(0))); + return Builder.CreateStore(Builder.CreateExtractValue(CI, 1), + EmitPointerWithAlignment(E->getArg(1))); + } + if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c new file mode 100644 index 00000000000000..282819c8ca3501 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: @test_in_streaming_mode( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1 +// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0 +// CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0 +// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +bool test_in_streaming_mode(void) __arm_streaming_compatible { + return __arm_in_streaming_mode(); +} + +// CHECK-LABEL: @test_za_disable( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_za_disablev( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]] +// CPP-CHECK-NEXT: ret void +// +void test_za_disable(void) __arm_streaming_compatible { + __arm_za_disable(); +} + +// CHECK-LABEL: @test_has_sme( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 +// CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +// CPP-CHECK-LABEL: @_Z12test_has_smev( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 +// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +bool test_has_sme(void) __arm_streaming_compatible { + return __arm_has_sme(); +} + +// CHECK-LABEL: @test_svundef_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svundef_zav( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret void +// +void test_svundef_za(void) __arm_streaming_compatible __arm_shared_za { + svundef_za(); +} + diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c index be7ec719edea55..fb53ea456c816a 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c @@ -8,13 +8,11 @@ // RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -#include +#include #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. @@ -23,6 +21,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_qcvtn_s16_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) @@ -37,7 +41,7 @@ // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x2.nxv4i32( [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible { +svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqcvtn_s16,_s32_x2,,)(zn); } @@ -55,7 +59,7 @@ svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible { // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x2.nxv4i32( [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) __arm_streaming_compatible { +svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqcvtn_u16,_u32_x2,,)(zn); } @@ -73,6 +77,6 @@ svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) __arm_streaming_compatible { // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x2.nxv4i32( [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) __arm_streaming_compatible { +svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqcvtn_u16,_s32_x2,,)(zn); } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 311c6b09dc7901..6c302da106a2cf 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1603,6 +1603,25 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "extern \"C\" {\n"; OS << "#endif\n\n"; + OS << "void __arm_za_disable(void) __arm_streaming_compatible;\n\n"; + + OS << "__ai bool __arm_has_sme(void) __arm_streaming_compatible {\n"; + OS << " uint64_t x0, x1;\n"; + OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; + OS << " return x0 & (1ULL << 63);\n"; + OS << "}\n\n"; + + OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible " + "{\n"; + OS << " uint64_t x0, x1;\n"; + OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; + OS << " return x0 & 1;\n"; + OS << "}\n\n"; + + OS << "__ai __attribute__((target(\"sme\"))) void svundef_za(void) " + "__arm_streaming_compatible __arm_shared_za " + "{ }\n\n"; + createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME); OS << "#ifdef __cplusplus\n";