Skip to content

Commit

Permalink
[Clang][AArch64] Add missing SME functions to header file. (llvm#75791)
Browse files Browse the repository at this point in the history
This includes:
* __arm_in_streaming_mode()
* __arm_has_sme()
* __arm_za_disable()
* __svundef_za()
  • Loading branch information
sdesmalen-arm committed Jan 2, 2024
1 parent d714be9 commit 5055eee
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 9 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/BuiltinsAArch64.def
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ TARGET_BUILTIN(__builtin_arm_ldg, "v*v*", "t", "mte")
TARGET_BUILTIN(__builtin_arm_stg, "vv*", "t", "mte")
TARGET_BUILTIN(__builtin_arm_subp, "Uiv*v*", "t", "mte")

// SME state function
BUILTIN(__builtin_arm_get_sme_state, "vULi*ULi*", "n")

// Memory Operations
TARGET_BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "", "mte,mops")

Expand Down
20 changes: 20 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10430,6 +10430,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
}

if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
// Create call to __arm_sme_state and store the results to the two pointers.
CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
false),
"__arm_sme_state"));
auto Attrs =
AttributeList()
.addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible")
.addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved");
CI->setAttributes(Attrs);
CI->setCallingConv(
llvm::CallingConv::
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
EmitPointerWithAlignment(E->getArg(0)));
return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
EmitPointerWithAlignment(E->getArg(1)));
}

if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
assert((getContext().getTypeSize(E->getType()) == 32) &&
"rbit of unusual size!");
Expand Down
72 changes: 72 additions & 0 deletions clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

#include <arm_sme_draft_spec_subject_to_change.h>

// CHECK-LABEL: @test_in_streaming_mode(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool test_in_streaming_mode(void) __arm_streaming_compatible {
return __arm_in_streaming_mode();
}

// CHECK-LABEL: @test_za_disable(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]]
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z15test_za_disablev(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]]
// CPP-CHECK-NEXT: ret void
//
void test_za_disable(void) __arm_streaming_compatible {
__arm_za_disable();
}

// CHECK-LABEL: @test_has_sme(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CPP-CHECK-LABEL: @_Z12test_has_smev(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool test_has_sme(void) __arm_streaming_compatible {
return __arm_has_sme();
}

// CHECK-LABEL: @test_svundef_za(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z15test_svundef_zav(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: ret void
//
void test_svundef_za(void) __arm_streaming_compatible __arm_shared_za {
svundef_za();
}

22 changes: 13 additions & 9 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@
// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

#include <arm_sme_draft_spec_subject_to_change.h>
#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
Expand All @@ -23,6 +21,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

#ifndef TEST_SME2
#define ATTR
#else
#define ATTR __arm_streaming
#endif

// CHECK-LABEL: @test_qcvtn_s16_s32_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
Expand All @@ -37,7 +41,7 @@
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) ATTR {
return SVE_ACLE_FUNC(svqcvtn_s16,_s32_x2,,)(zn);
}

Expand All @@ -55,7 +59,7 @@ svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) __arm_streaming_compatible {
svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) ATTR {
return SVE_ACLE_FUNC(svqcvtn_u16,_u32_x2,,)(zn);
}

Expand All @@ -73,6 +77,6 @@ svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) __arm_streaming_compatible {
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) ATTR {
return SVE_ACLE_FUNC(svqcvtn_u16,_s32_x2,,)(zn);
}
19 changes: 19 additions & 0 deletions clang/utils/TableGen/SveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,25 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
OS << "extern \"C\" {\n";
OS << "#endif\n\n";

OS << "void __arm_za_disable(void) __arm_streaming_compatible;\n\n";

OS << "__ai bool __arm_has_sme(void) __arm_streaming_compatible {\n";
OS << " uint64_t x0, x1;\n";
OS << " __builtin_arm_get_sme_state(&x0, &x1);\n";
OS << " return x0 & (1ULL << 63);\n";
OS << "}\n\n";

OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible "
"{\n";
OS << " uint64_t x0, x1;\n";
OS << " __builtin_arm_get_sme_state(&x0, &x1);\n";
OS << " return x0 & 1;\n";
OS << "}\n\n";

OS << "__ai __attribute__((target(\"sme\"))) void svundef_za(void) "
"__arm_streaming_compatible __arm_shared_za "
"{ }\n\n";

createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME);

OS << "#ifdef __cplusplus\n";
Expand Down

0 comments on commit 5055eee

Please sign in to comment.