From d777504355dac9a2a8a7bbba6001d05b936868b2 Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Mon, 18 Dec 2023 11:14:46 +0100 Subject: [PATCH 01/17] [MLIR][OpenMP][Offload] Lower target update op to DeviceRT (#75159) Adds support for lowring `UpdateDataOp` to the DeviceRT. This reuses the existing utils used by other device directive. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 24 ++++++++++-- mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 38 +++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4f6200d29a70a6..088e7ae4231bef 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1915,6 +1915,23 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, mapOperands = exitDataOp.getMapOperands(); return success(); }) + .Case([&](omp::UpdateDataOp updateDataOp) { + if (updateDataOp.getNowait()) + return failure(); + + if (auto ifExprVar = updateDataOp.getIfExpr()) + ifCond = moduleTranslation.lookupValue(ifExprVar); + + if (auto devId = updateDataOp.getDevice()) + if (auto constOp = + dyn_cast(devId.getDefiningOp())) + if (auto intAttr = dyn_cast(constOp.getValue())) + deviceID = intAttr.getInt(); + + RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper; + mapOperands = updateDataOp.getMotionOperands(); + return success(); + }) .Default([&](Operation *op) { return op->emitError("unsupported OpenMP operation: ") << op->getName(); @@ -2748,9 +2765,10 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::ThreadprivateOp) { return convertOmpThreadprivate(*op, builder, moduleTranslation); }) - .Case([&](auto op) { - return convertOmpTargetData(op, builder, moduleTranslation); - }) + .Case( + [&](auto op) { + return convertOmpTargetData(op, builder, moduleTranslation); + }) .Case([&](omp::TargetOp) { return convertOmpTarget(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 9221b410d766ed..b089d47f795df3 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -441,3 +441,41 @@ llvm.func @_QPopenmp_target_use_dev_both() { // CHECK: ret void // ----- + +llvm.func @_QPopenmp_target_data_update() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(99 : i32) : i32 + llvm.store %3, %1 : i32, !llvm.ptr + omp.terminator + } + + omp.target_update_data motion_entries(%2 : !llvm.ptr) + + llvm.return +} + +// CHECK-LABEL: define void @_QPopenmp_target_data_update + +// CHECK-DAG: %[[OFFLOAD_BASEPTRS:.*]] = alloca [1 x ptr], align 8 +// CHECK-DAG: %[[OFFLOAD_PTRS:.*]] = alloca [1 x ptr], align 8 +// CHECK-DAG: %[[INT_ALLOCA:.*]] = alloca i32, i64 1, align 4 +// CHECK-DAG: %[[OFFLOAD_MAPPERS:.*]] = alloca [1 x ptr], align 8 + +// CHECK: call void @__tgt_target_data_begin_mapper +// CHECK: store i32 99, ptr %[[INT_ALLOCA]], align 4 +// CHECK: call void @__tgt_target_data_end_mapper + +// CHECK: %[[BASEPTRS_VAL:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK: store ptr %[[INT_ALLOCA]], ptr %[[BASEPTRS_VAL]], align 8 +// CHECK: %[[PTRS_VAL:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_PTRS]], i32 0, i32 0 +// CHECK: store ptr %[[INT_ALLOCA]], ptr %[[PTRS_VAL]], align 8 +// CHECK: %[[MAPPERS_VAL:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK: store ptr null, ptr %[[MAPPERS_VAL]], align 8 +// CHECK: %[[BASEPTRS_VAL_2:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK: %[[PTRS_VAL_2:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_PTRS]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_update_mapper(ptr @2, i64 -1, i32 1, ptr %[[BASEPTRS_VAL_2]], ptr %[[PTRS_VAL_2]], ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr null) + +// CHECK: ret void From 9e3d915d8ebf86e24c9ff58766be8e7c6aa7b0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 18 Dec 2023 12:31:05 +0200 Subject: [PATCH 02/17] [llvm-windres] Change the interpretation of --preprocessor to match Binutils 2.36 (#75391) Binutils 2.36 had a somewhat controversial change in how the --preprocessor option was handled in GNU windres; previously, the option was interpreted as a part of the command string, potentially containing multiple arguments (which even was hinted at in the documentation). In Binutils 2.36, this was changed to interpret the --preprocessor argument as one argument (possibly containing spaces) pointing at the preprocessor executable. The existing behaviour where implicit arguments like -E -xc -DRC_INVOKED are dropped if --preprocessor is specified, was kept. This was a breaking change for some users of GNU windres, see https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=21c33bcbe36377abf01614fb1b9be439a3b6de20, https://sourceware.org/bugzilla/show_bug.cgi?id=27594, and https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=5edb8e3f5ad8d74a83fc0df7f6e4514eed0aa77f. As multiple years have passed since, the behaviour change seems to be here to stay, and any users of the previous form of the option have been forced to avoid this construct. Thus update llvm-windres to match the new way Binutils of handling this option. One construct for specifying the path to the preprocessor, which works both before and after binutils 2.36 (and this change in llvm-windres) is to specify options like this: --preprocessor path/to/executable --preprocessor-arg -E --preprocessor-arg -xc -DRC_INVOKED --- llvm/test/tools/llvm-rc/windres-preproc.test | 9 +++- llvm/tools/llvm-rc/llvm-rc.cpp | 44 +++----------------- 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test index 74e888614aa2b9..13f82299a074bb 100644 --- a/llvm/test/tools/llvm-rc/windres-preproc.test +++ b/llvm/test/tools/llvm-rc/windres-preproc.test @@ -6,8 +6,8 @@ ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1 ; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}} -; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc -E -DFOO=\\\"foo\\ bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2 -; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}} +; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc" --preprocessor-arg -E "-DFOO=\\\"foo bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2 +; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-D" "FOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}} ;; Test resolving the --preprocessor executable from PATH @@ -22,3 +22,8 @@ ; RUN: not llvm-windres --preprocessor intentionally-missing-executable %p/Inputs/empty.rc %t.res 2>&1 | FileCheck %s --check-prefix=CHECK4 ; CHECK4: llvm-rc: Preprocessing failed: Executable "intentionally-missing-executable" doesn't exist! + +;; Test --preprocessor with an argument with spaces. + +; RUN: llvm-windres -### --preprocessor "path with spaces/gcc" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK5 +; CHECK5: {{^}} "path with spaces/gcc" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}} diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 27fb0309e0ee54..78ab96492acc75 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -209,7 +209,7 @@ struct RcOptions { bool Preprocess = true; bool PrintCmdAndExit = false; std::string Triple; - std::vector PreprocessCmd; + std::optional Preprocessor; std::vector PreprocessArgs; std::string InputFile; @@ -229,7 +229,7 @@ struct RcOptions { void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts, const char *Argv0) { std::string Clang; - if (Opts.PrintCmdAndExit || !Opts.PreprocessCmd.empty()) { + if (Opts.PrintCmdAndExit || Opts.Preprocessor) { Clang = "clang"; } else { ErrorOr ClangOrErr = findClang(Argv0, Opts.Triple); @@ -249,10 +249,9 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts, Clang, "--driver-mode=gcc", "-target", Opts.Triple, "-E", "-xc", "-DRC_INVOKED"}; std::string PreprocessorExecutable; - if (!Opts.PreprocessCmd.empty()) { + if (Opts.Preprocessor) { Args.clear(); - for (const auto &S : Opts.PreprocessCmd) - Args.push_back(S); + Args.push_back(*Opts.Preprocessor); if (!sys::fs::can_execute(Args[0])) { if (auto P = sys::findProgramByName(Args[0])) { PreprocessorExecutable = *P; @@ -342,36 +341,6 @@ std::string unescape(StringRef S) { return Out; } -std::vector unescapeSplit(StringRef S) { - std::vector OutArgs; - std::string Out; - bool InQuote = false; - for (int I = 0, E = S.size(); I < E; I++) { - if (S[I] == '\\') { - if (I + 1 < E) - Out.push_back(S[++I]); - else - fatalError("Unterminated escape"); - continue; - } - if (S[I] == '"') { - InQuote = !InQuote; - continue; - } - if (S[I] == ' ' && !InQuote) { - OutArgs.push_back(Out); - Out.clear(); - continue; - } - Out.push_back(S[I]); - } - if (InQuote) - fatalError("Unterminated quote"); - if (!Out.empty()) - OutArgs.push_back(Out); - return OutArgs; -} - RcOptions parseWindresOptions(ArrayRef ArgsArr, ArrayRef InputArgsArray, std::string Prefix) { @@ -506,11 +475,8 @@ RcOptions parseWindresOptions(ArrayRef ArgsArr, break; } } - // TODO: If --use-temp-file is set, we shouldn't be unescaping - // the --preprocessor argument either, only splitting it. if (InputArgs.hasArg(WINDRES_preprocessor)) - Opts.PreprocessCmd = - unescapeSplit(InputArgs.getLastArgValue(WINDRES_preprocessor)); + Opts.Preprocessor = InputArgs.getLastArgValue(WINDRES_preprocessor); Opts.Params.CodePage = CpWin1252; // Different default if (InputArgs.hasArg(WINDRES_codepage)) { From 428660cfb986dd0a59cd2a16972c5f7109080522 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 18 Dec 2023 10:52:42 +0000 Subject: [PATCH 03/17] [GitHub] Don't indent comment that revision has passed the formatting check Due to the way the f string was written, the text ended up with 4 spaces at the start. 4 space indent in Markdown means plain text, which is not what we intend here. --- llvm/utils/git/code-format-helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index 697a27ab82a750..849ae996f7339f 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -131,8 +131,8 @@ def run(self, changed_files: List[str], args: FormatArgs) -> bool: if diff is None: if should_update_gh: comment_text = f""" - :white_check_mark: With the latest revision this PR passed the {self.friendly_name}. - """ +:white_check_mark: With the latest revision this PR passed the {self.friendly_name}. +""" self.update_pr(comment_text, args, create_new=False) return True elif len(diff) > 0: From 6deb5d4e440f34e6838eaf617198caea7ce721c5 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 18 Dec 2023 11:59:35 +0100 Subject: [PATCH 04/17] [NFC][OpenMP][MLIR] Verify if empty workshare loop is lowered correctly (#75518) Check if workshare loop without loop body is lowered correctly i.e.: 1) null pointer is passed to OpenMP device RTL function as a parameter which denotes loop function body aggregated parameters 2) Outlined loop function body has only one parameter - loop counter --- mlir/test/Target/LLVMIR/omptarget-wsloop.mlir | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index ba641be4dada17..220eb85b3483ec 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -15,6 +15,16 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo } llvm.return } + + llvm.func @target_empty_wsloop(){ + %loop_ub = llvm.mlir.constant(9 : i32) : i32 + %loop_lb = llvm.mlir.constant(0 : i32) : i32 + %loop_step = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.yield + } + llvm.return + } } // CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]]) @@ -31,3 +41,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: %[[GEP3:.*]] = getelementptr [10 x i32], ptr %[[LOADGEP]], i32 0, i32 %[[TMP2:.*]] // CHECK: store i32 %[[VAL0:.*]], ptr %[[GEP3]], align 4 +// CHECK: define void @[[FUNC_EMPTY_WSLOOP:.*]]() +// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 10, i32 %[[NUM_THREADS:.*]], i32 0) + +// CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]]) From 1faa1cd02d569c1e58303ab4bd9d082aecda2a52 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 18 Dec 2023 11:03:39 +0000 Subject: [PATCH 05/17] [Clang][SVE2.1] Add intrinsics for `WHILEcc` resulting in predicate pair (#75107) Add intrinsics of the form: svboolx2_t svwhile_b{8,16,32,64}_[{s,u}64]_x2([u]int64_t, [u]int64_t); and their overloaded variants as specified in https://github.com/ARM-software/acle/pull/257 --- clang/include/clang/Basic/arm_sve.td | 12 + .../acle_sve2p1_while_x2.c | 879 ++++++++++++++++++ 2 files changed, 891 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98434c5c53e28c..a429a3c5fe378a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1341,6 +1341,18 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNon def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; } +let TargetGuard = "sve2p1|sme2" in { + def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2">; + def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2">; + def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2">; + def SVWHILEHS_U64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2">; + def SVWHILELE_S64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2">; + def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2">; + def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2">; + def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2">; + +} + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Uniform DSP operations diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c new file mode 100644 index 00000000000000..acead9be3f01d2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -0,0 +1,879 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local @test_svwhilege_b8_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilege_b8_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b8_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilege_b8_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b16_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b16_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b16_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b16_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b32_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b32_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b32_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b32_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b64_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b64_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilege_b64_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b64_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b8_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilegt_b8_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b8_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilegt_b8_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b16_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b16_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b16_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b16_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b32_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b32_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b32_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b32_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b64_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b64_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilegt_b64_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b64_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b8_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilele_b8_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b8_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilele_b8_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b16_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b16_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b16_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b16_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b32_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b32_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b32_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b32_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b64_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b64_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilele_b64_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b64_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b8_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilelt_b8_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b8_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilelt_b8_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b16_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b16_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b16_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b16_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b32_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b32_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b32_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b32_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b64_s64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b64_s64ll( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2); +} + +// CHECK-LABEL: define dso_local @test_svwhilelt_b64_u64( +// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b64_u64mm( +// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) { + return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2); +} From a34db9bdefe6d60b46f485aa1105ef6c95542e16 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Mon, 18 Dec 2023 12:07:22 +0100 Subject: [PATCH 06/17] [AMDGPU][NFC] Simplify needcopysign logic (#75176) This was caught by coverity, reported as: `dead_error_condition`. Since the conditional revolves around `CF`, it is guaranteed to be null in the else clause, hence making the second part of the statement redundant. --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 0c21382e5c225e..f03e6b8915b134 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1050,8 +1050,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, CF->isNegative(); } else { needlog = true; - needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && - (!CF || CF->isNegative()); + needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; } } else { ConstantDataVector *CDV = dyn_cast(opr0); From a4deb14e353cd8c543f78979ce986f98023d36dd Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Mon, 18 Dec 2023 11:19:32 +0000 Subject: [PATCH 07/17] [Flang][OpenMP] Add check-dag to private-clause-fixes test --- .../OpenMP/parallel-private-clause-fixes.f90 | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 index d7131296757917..8533106b7ac487 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 @@ -4,20 +4,20 @@ ! CHECK-LABEL: multiple_private_fix ! CHECK-SAME: %[[GAMA:.*]]: !fir.ref {fir.bindc_name = "gama"} -! CHECK: %[[GAMA_DECL:.*]]:2 = hlfir.declare %[[GAMA]] {uniq_name = "_QFmultiple_private_fixEgama"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_private_fixEi"} -! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFmultiple_private_fixEj"} -! CHECK: %[[J_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"} -! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[GAMA_DECL:.*]]:2 = hlfir.declare %[[GAMA]] {uniq_name = "_QFmultiple_private_fixEgama"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_private_fixEi"} +! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFmultiple_private_fixEj"} +! CHECK-DAG: %[[J_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"} +! CHECK-DAG: %[[X_DECL:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: omp.parallel { -! CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -! CHECK: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFmultiple_private_fixEj"} -! CHECK: %[[PRIV_J_DECL:.*]]:2 = hlfir.declare %[[PRIV_J]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned -! CHECK: %[[PRIV_X_DECL:.*]]:2 = hlfir.declare %[[PRIV_X]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK-DAG: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFmultiple_private_fixEj"} +! CHECK-DAG: %[[PRIV_J_DECL:.*]]:2 = hlfir.declare %[[PRIV_J]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-DAG: %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned +! CHECK-DAG: %[[PRIV_X_DECL:.*]]:2 = hlfir.declare %[[PRIV_X]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 From 7c1d8c74e8576c12da526f589c802faa91bc410a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 18 Dec 2023 12:20:54 +0100 Subject: [PATCH 08/17] [ValueTracking] Add test for non-zero sub via known non equal (NFC) --- .../Analysis/ValueTracking/known-non-zero.ll | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index dbec47ea0ae261..d8dbd1a1b0bda4 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -1218,3 +1218,19 @@ define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_fail(<2 x i8> %a, <2 x i8> ret <2 x i1> %r } +define i1 @sub_via_non_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @sub_via_non_eq( +; CHECK-NEXT: [[NE:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[NE]]) +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i8 [[X]], 3 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[SHL]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ne = icmp ne i8 %x, 0 + call void @llvm.assume(i1 %ne) + %shl = shl nuw i8 %x, 3 + %sub = sub i8 %x, %shl + %cmp = icmp eq i8 %sub, 0 + ret i1 %cmp +} From 337504683efa0b898bcf69b4e5be67d73dbaf180 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 18 Dec 2023 09:58:23 +0100 Subject: [PATCH 09/17] [ValueTracking] Use isKnownNonEqual() in isNonZeroSub() (x - y) != 0 is true iff x != y, so use the isKnownNonEqual() helper, which knows some additional tricks. --- llvm/lib/Analysis/ValueTracking.cpp | 11 ++--------- llvm/test/Analysis/ValueTracking/known-non-zero.ll | 5 +---- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 9ae05a4b5ccc72..45fdd4eda47d76 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2368,19 +2368,12 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q, unsigned BitWidth, Value *X, Value *Y) { + // TODO: Move this case into isKnownNonEqual(). if (auto *C = dyn_cast(X)) if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) return true; - KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); - if (XKnown.isUnknown()) - return false; - KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); - // If X != Y then X - Y is non zero. - std::optional ne = KnownBits::ne(XKnown, YKnown); - // If we are unable to compute if X != Y, we won't be able to do anything - // computing the knownbits of the sub expression so just return here. - return ne && *ne; + return ::isKnownNonEqual(X, Y, Depth, Q); } static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index d8dbd1a1b0bda4..c8e17f8dcc69f8 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -1222,10 +1222,7 @@ define i1 @sub_via_non_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @sub_via_non_eq( ; CHECK-NEXT: [[NE:%.*]] = icmp ne i8 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[NE]]) -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i8 [[X]], 3 -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[SHL]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SUB]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ne = icmp ne i8 %x, 0 call void @llvm.assume(i1 %ne) From 465ecf872ec6c09fe610bf161c54031f6a372e95 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 18 Dec 2023 12:36:19 +0100 Subject: [PATCH 10/17] [InstCombine] Rename UndefElts -> PoisonElts (NFC) In line with updated shufflevector semantics, this represents the poison elements rather than undef elements now. This commit is a pure rename, without any logic changes. --- .../InstCombine/InstCombineCalls.cpp | 20 +-- .../InstCombine/InstCombineInternal.h | 2 +- .../InstCombine/InstCombineSelect.cpp | 4 +- .../InstCombineSimplifyDemanded.cpp | 122 +++++++++--------- .../InstCombine/InstCombineVectorOps.cpp | 17 +-- .../InstCombine/InstructionCombining.cpp | 10 +- 6 files changed, 88 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1539fa9a3269e1..c496f9c7419b5f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -357,9 +357,9 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); - APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) + APInt PoisonElts(DemandedElts.getBitWidth(), 0); + if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, + PoisonElts)) return replaceOperand(II, 0, V); return nullptr; @@ -439,12 +439,12 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); - APInt UndefElts(DemandedElts.getBitWidth(), 0); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) + APInt PoisonElts(DemandedElts.getBitWidth(), 0); + if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, + PoisonElts)) return replaceOperand(II, 0, V); - if (Value *V = - SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts)) + if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, + PoisonElts)) return replaceOperand(II, 1, V); return nullptr; @@ -1526,9 +1526,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // support. if (auto *IIFVTy = dyn_cast(II->getType())) { auto VWidth = IIFVTy->getNumElements(); - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) { if (V != II) return replaceInstUsesWith(*II, V); return II; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 1d50fa9b6bf74b..f86db698ef8f12 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -550,7 +550,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, unsigned Depth = 0, + APInt &PoisonElts, unsigned Depth = 0, bool AllowMultipleUsers = false) override; /// Canonicalize the position of binops relative to shufflevector. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 2dda46986f0fd0..20bf00344b144b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2440,9 +2440,9 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { return nullptr; unsigned NumElts = VecTy->getNumElements(); - APInt UndefElts(NumElts, 0); + APInt PoisonElts(NumElts, 0); APInt AllOnesEltMask(APInt::getAllOnes(NumElts)); - if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, PoisonElts)) { if (V != &Sel) return replaceInstUsesWith(Sel, V); return &Sel; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 2490f5b9b97eb8..a8ed6fe1432d94 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1319,8 +1319,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits( } /// The specified value produces a vector with any number of elements. -/// This method analyzes which elements of the operand are undef or poison and -/// returns that information in UndefElts. +/// This method analyzes which elements of the operand are poison and +/// returns that information in PoisonElts. /// /// DemandedElts contains the set of elements that are actually used by the /// caller, and by default (AllowMultipleUsers equals false) the value is @@ -1333,7 +1333,7 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits( /// returned. This returns null if no change was made. Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, + APInt &PoisonElts, unsigned Depth, bool AllowMultipleUsers) { // Cannot analyze scalable type. The number of vector elements is not a @@ -1347,16 +1347,16 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (match(V, m_Undef())) { // If the entire vector is undef or poison, just return this info. - UndefElts = EltMask; + PoisonElts = EltMask; return nullptr; } if (DemandedElts.isZero()) { // If nothing is demanded, provide poison. - UndefElts = EltMask; + PoisonElts = EltMask; return PoisonValue::get(V->getType()); } - UndefElts = 0; + PoisonElts = 0; if (auto *C = dyn_cast(V)) { // Check if this is identity. If so, return 0 since we are not simplifying @@ -1370,7 +1370,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, for (unsigned i = 0; i != VWidth; ++i) { if (!DemandedElts[i]) { // If not demanded, set to poison. Elts.push_back(Poison); - UndefElts.setBit(i); + PoisonElts.setBit(i); continue; } @@ -1379,7 +1379,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, Elts.push_back(Elt); if (isa(Elt)) // Already poison. - UndefElts.setBit(i); + PoisonElts.setBit(i); } // If we changed the constant, return it. @@ -1400,7 +1400,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // They'll be handled when it's their turn to be visited by // the main instcombine process. if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. + // TODO: Just compute the PoisonElts information recursively. return nullptr; // Conservatively assume that all elements are needed. @@ -1422,8 +1422,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } }; - APInt UndefElts2(VWidth, 0); - APInt UndefElts3(VWidth, 0); + APInt PoisonElts2(VWidth, 0); + APInt PoisonElts3(VWidth, 0); switch (I->getOpcode()) { default: break; @@ -1449,17 +1449,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (i == 0 ? match(I->getOperand(i), m_Undef()) : match(I->getOperand(i), m_Poison())) { // If the entire vector is undefined, just return this info. - UndefElts = EltMask; + PoisonElts = EltMask; return nullptr; } if (I->getOperand(i)->getType()->isVectorTy()) { - APInt UndefEltsOp(VWidth, 0); - simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); + APInt PoisonEltsOp(VWidth, 0); + simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp); // gep(x, undef) is not undef, so skip considering idx ops here // Note that we could propagate poison, but we can't distinguish between // undef & poison bits ATM if (i == 0) - UndefElts |= UndefEltsOp; + PoisonElts |= PoisonEltsOp; } } @@ -1472,7 +1472,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (!Idx) { // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. - simplifyAndSetOp(I, 0, DemandedElts, UndefElts2); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2); break; } @@ -1487,7 +1487,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // was extracted from the same index in another vector with the same type, // replace this insert with that other vector. // Note: This is attempted before the call to simplifyAndSetOp because that - // may change UndefElts to a value that does not match with Vec. + // may change PoisonElts to a value that does not match with Vec. Value *Vec; if (PreInsertDemandedElts == 0 && match(I->getOperand(1), @@ -1496,7 +1496,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return Vec; } - simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts); + simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts); // If this is inserting an element that isn't demanded, remove this // insertelement. @@ -1506,7 +1506,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } // The inserted element is defined. - UndefElts.clearBit(IdxNo); + PoisonElts.clearBit(IdxNo); break; } case Instruction::ShuffleVector: { @@ -1525,12 +1525,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, MadeChange = true; } APInt LeftDemanded(OpWidth, 1); - APInt LHSUndefElts(OpWidth, 0); - simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); - if (LHSUndefElts[0]) - UndefElts = EltMask; + APInt LHSPoisonElts(OpWidth, 0); + simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); + if (LHSPoisonElts[0]) + PoisonElts = EltMask; else - UndefElts.clearAllBits(); + PoisonElts.clearAllBits(); break; } @@ -1549,11 +1549,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } } - APInt LHSUndefElts(OpWidth, 0); - simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); + APInt LHSPoisonElts(OpWidth, 0); + simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); - APInt RHSUndefElts(OpWidth, 0); - simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts); + APInt RHSPoisonElts(OpWidth, 0); + simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts); // If this shuffle does not change the vector length and the elements // demanded by this shuffle are an identity mask, then this shuffle is @@ -1579,7 +1579,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return Shuffle->getOperand(0); } - bool NewUndefElts = false; + bool NewPoisonElts = false; unsigned LHSIdx = -1u, LHSValIdx = -1u; unsigned RHSIdx = -1u, RHSValIdx = -1u; bool LHSUniform = true; @@ -1587,23 +1587,23 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { - UndefElts.setBit(i); + PoisonElts.setBit(i); } else if (!DemandedElts[i]) { - NewUndefElts = true; - UndefElts.setBit(i); + NewPoisonElts = true; + PoisonElts.setBit(i); } else if (MaskVal < OpWidth) { - if (LHSUndefElts[MaskVal]) { - NewUndefElts = true; - UndefElts.setBit(i); + if (LHSPoisonElts[MaskVal]) { + NewPoisonElts = true; + PoisonElts.setBit(i); } else { LHSIdx = LHSIdx == -1u ? i : OpWidth; LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth; LHSUniform = LHSUniform && (MaskVal == i); } } else { - if (RHSUndefElts[MaskVal - OpWidth]) { - NewUndefElts = true; - UndefElts.setBit(i); + if (RHSPoisonElts[MaskVal - OpWidth]) { + NewPoisonElts = true; + PoisonElts.setBit(i); } else { RHSIdx = RHSIdx == -1u ? i : OpWidth; RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth; @@ -1646,11 +1646,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return New; } } - if (NewUndefElts) { + if (NewPoisonElts) { // Add additional discovered undefs. SmallVector Elts; for (unsigned i = 0; i < VWidth; ++i) { - if (UndefElts[i]) + if (PoisonElts[i]) Elts.push_back(PoisonMaskElem); else Elts.push_back(Shuffle->getMaskValue(i)); @@ -1665,12 +1665,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // on the current demanded elements. SelectInst *Sel = cast(I); if (Sel->getCondition()->getType()->isVectorTy()) { - // TODO: We are not doing anything with UndefElts based on this call. + // TODO: We are not doing anything with PoisonElts based on this call. // It is overwritten below based on the other select operands. If an // element of the select condition is known undef, then we are free to // choose the output value from either arm of the select. If we know that // one of those values is undef, then the output can be undef. - simplifyAndSetOp(I, 0, DemandedElts, UndefElts); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); } // Next, see if we can transform the arms of the select. @@ -1692,12 +1692,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } } - simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2); - simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3); + simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2); + simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3); // Output elements are undefined if the element from each arm is undefined. // TODO: This can be improved. See comment in select condition handling. - UndefElts = UndefElts2 & UndefElts3; + PoisonElts = PoisonElts2 & PoisonElts3; break; } case Instruction::BitCast: { @@ -1706,7 +1706,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (!VTy) break; unsigned InVWidth = cast(VTy)->getNumElements(); APInt InputDemandedElts(InVWidth, 0); - UndefElts2 = APInt(InVWidth, 0); + PoisonElts2 = APInt(InVWidth, 0); unsigned Ratio; if (VWidth == InVWidth) { @@ -1735,25 +1735,25 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, break; } - simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2); + simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2); if (VWidth == InVWidth) { - UndefElts = UndefElts2; + PoisonElts = PoisonElts2; } else if ((VWidth % InVWidth) == 0) { // If the number of elements in the output is a multiple of the number of // elements in the input then an output element is undef if the // corresponding input element is undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) - if (UndefElts2[OutIdx / Ratio]) - UndefElts.setBit(OutIdx); + if (PoisonElts2[OutIdx / Ratio]) + PoisonElts.setBit(OutIdx); } else if ((InVWidth % VWidth) == 0) { // If the number of elements in the input is a multiple of the number of // elements in the output then an output element is undef if all of the // corresponding input elements are undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { - APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); + APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); if (SubUndef.popcount() == Ratio) - UndefElts.setBit(OutIdx); + PoisonElts.setBit(OutIdx); } } else { llvm_unreachable("Unimp"); @@ -1762,7 +1762,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, } case Instruction::FPTrunc: case Instruction::FPExt: - simplifyAndSetOp(I, 0, DemandedElts, UndefElts); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); break; case Instruction::Call: { @@ -1785,18 +1785,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, DemandedPassThrough.clearBit(i); } if (II->getIntrinsicID() == Intrinsic::masked_gather) - simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2); - simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3); + simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2); + simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3); // Output elements are undefined if the element from both sources are. // TODO: can strengthen via mask as well. - UndefElts = UndefElts2 & UndefElts3; + PoisonElts = PoisonElts2 & PoisonElts3; break; } default: { // Handle target specific intrinsics std::optional V = targetSimplifyDemandedVectorEltsIntrinsic( - *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3, simplifyAndSetOp); if (V) return *V; @@ -1859,17 +1859,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, return ShufBO; } - simplifyAndSetOp(I, 0, DemandedElts, UndefElts); - simplifyAndSetOp(I, 1, DemandedElts, UndefElts2); + simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); + simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2); // Output elements are undefined if both are undefined. Consider things // like undef & 0. The result is known zero, not undef. - UndefElts &= UndefElts2; + PoisonElts &= PoisonElts2; } // If we've proven all of the lanes undef, return an undef value. // TODO: Intersect w/demanded lanes - if (UndefElts.isAllOnes()) + if (PoisonElts.isAllOnes()) return UndefValue::get(I->getType()); return MadeChange ? I : nullptr; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index c8b58c51d4e6ec..c381d25011f688 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -581,20 +581,20 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // If the input vector has a single use, simplify it based on this use // property. if (SrcVec->hasOneUse()) { - APInt UndefElts(NumElts, 0); + APInt PoisonElts(NumElts, 0); APInt DemandedElts(NumElts, 0); DemandedElts.setBit(IndexC->getZExtValue()); if (Value *V = - SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) + SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts)) return replaceOperand(EI, 0, V); } else { // If the input vector has multiple uses, simplify it based on a union // of all elements used. APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); if (!DemandedElts.isAllOnes()) { - APInt UndefElts(NumElts, 0); + APInt PoisonElts(NumElts, 0); if (Value *V = SimplifyDemandedVectorElts( - SrcVec, DemandedElts, UndefElts, 0 /* Depth */, + SrcVec, DemandedElts, PoisonElts, 0 /* Depth */, true /* AllowMultipleUsers */)) { if (V != SrcVec) { Worklist.addValue(SrcVec); @@ -1713,9 +1713,10 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { if (auto VecTy = dyn_cast(VecOp->getType())) { unsigned VWidth = VecTy->getNumElements(); - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, + PoisonElts)) { if (V != &IE) return replaceInstUsesWith(IE, V); return &IE; @@ -2855,9 +2856,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = foldCastShuffle(SVI, Builder)) return I; - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) { if (V != &SVI) return replaceInstUsesWith(SVI, V); return &SVI; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index a7ddadc25de43c..94f60719b78ca3 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -173,14 +173,14 @@ std::optional InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( } std::optional InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( - IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, - APInt &UndefElts3, + IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts, + APInt &PoisonElts2, APInt &PoisonElts3, std::function SimplifyAndSetOp) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.simplifyDemandedVectorEltsIntrinsic( - *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3, SimplifyAndSetOp); } return std::nullopt; @@ -2241,10 +2241,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // compile-time. if (auto *GEPFVTy = dyn_cast(GEPType)) { auto VWidth = GEPFVTy->getNumElements(); - APInt UndefElts(VWidth, 0); + APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, - UndefElts)) { + PoisonElts)) { if (V != &GEP) return replaceInstUsesWith(GEP, V); return &GEP; From 42239d2e96c5683a2973c39ea7ea20b616218b66 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 18 Dec 2023 12:37:29 +0100 Subject: [PATCH 11/17] [clang] Fix CTAD not respect default template arguments that were added after the definition. (#75569) Fixes https://github.com/llvm/llvm-project/issues/69987 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Sema/SemaTemplate.cpp | 28 ++++++++++++++++------------ clang/test/SemaTemplate/ctad.cpp | 10 ++++++++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 26ba4f8f72508a..2e32f8b36d23de 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -688,6 +688,9 @@ Bug Fixes in This Version (`#62157 `_) and (`#64885 `_) and (`#65568 `_) +- Fix an issue where clang doesn't respect detault template arguments that + are added in a later redeclaration for CTAD. + Fixes (#69987 `_) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index f10abeaba0d451..5fcc39ec700522 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1824,6 +1824,15 @@ static void SetNestedNameSpecifier(Sema &S, TagDecl *T, T->setQualifierInfo(SS.getWithLocInContext(S.Context)); } +// Returns the template parameter list with all default template argument +// information. +static TemplateParameterList *GetTemplateParameterList(TemplateDecl *TD) { + // Make sure we get the template parameter list from the most + // recent declaration, since that is the only one that is guaranteed to + // have all the default template argument information. + return cast(TD->getMostRecentDecl())->getTemplateParameters(); +} + DeclResult Sema::CheckClassTemplate( Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, CXXScopeSpec &SS, IdentifierInfo *Name, SourceLocation NameLoc, @@ -2061,13 +2070,13 @@ DeclResult Sema::CheckClassTemplate( if (!(TUK == TUK_Friend && CurContext->isDependentContext()) && CheckTemplateParameterList( TemplateParams, - PrevClassTemplate - ? PrevClassTemplate->getMostRecentDecl()->getTemplateParameters() - : nullptr, + PrevClassTemplate ? GetTemplateParameterList(PrevClassTemplate) + : nullptr, (SS.isSet() && SemanticContext && SemanticContext->isRecord() && SemanticContext->isDependentContext()) ? TPC_ClassTemplateMember - : TUK == TUK_Friend ? TPC_FriendClassTemplate : TPC_ClassTemplate, + : TUK == TUK_Friend ? TPC_FriendClassTemplate + : TPC_ClassTemplate, SkipBody)) Invalid = true; @@ -2298,7 +2307,7 @@ struct ConvertConstructorToDeductionGuideTransform { // -- The template parameters are the template parameters of the class // template followed by the template parameters (including default // template arguments) of the constructor, if any. - TemplateParameterList *TemplateParams = Template->getTemplateParameters(); + TemplateParameterList *TemplateParams = GetTemplateParameterList(Template); if (FTD) { TemplateParameterList *InnerParams = FTD->getTemplateParameters(); SmallVector AllParams; @@ -2424,7 +2433,7 @@ struct ConvertConstructorToDeductionGuideTransform { Params.push_back(NewParam); } - return buildDeductionGuide(Template->getTemplateParameters(), nullptr, + return buildDeductionGuide(GetTemplateParameterList(Template), nullptr, ExplicitSpecifier(), TSI, Loc, Loc, Loc); } @@ -5956,12 +5965,7 @@ bool Sema::CheckTemplateArgumentList( // template. TemplateArgumentListInfo NewArgs = TemplateArgs; - // Make sure we get the template parameter list from the most - // recent declaration, since that is the only one that is guaranteed to - // have all the default template argument information. - TemplateParameterList *Params = - cast(Template->getMostRecentDecl()) - ->getTemplateParameters(); + TemplateParameterList *Params = GetTemplateParameterList(Template); SourceLocation RAngleLoc = NewArgs.getRAngleLoc(); diff --git a/clang/test/SemaTemplate/ctad.cpp b/clang/test/SemaTemplate/ctad.cpp index 4d836839d8c346..388ed7d4cced18 100644 --- a/clang/test/SemaTemplate/ctad.cpp +++ b/clang/test/SemaTemplate/ctad.cpp @@ -44,3 +44,13 @@ namespace Access { }; D z = {Z(), {}}; } + +namespace GH69987 { +template struct X {}; +template struct X; +X x; + +template struct Y { Y(T); }; +template struct Y ; +Y y(1); +}; From 9c18f031968aabc7698e9ab0ea94da3b665d4d5a Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Mon, 18 Dec 2023 12:43:10 +0100 Subject: [PATCH 12/17] [libc++] Adds headers to FTM. (#75699) These feature-test macros had no headers listed in their associated papers. This adds the expected headers. Fixes https://github.com/llvm/llvm-project/issues/75577 --- libcxx/include/version | 4 +- .../.version.compile.pass.cpp | 105 ------------------ .../format.version.compile.pass.cpp | 34 ++++++ .../numeric.version.compile.pass.cpp | 46 +++++++- .../generate_feature_test_macro_components.py | 10 +- 5 files changed, 83 insertions(+), 116 deletions(-) delete mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp diff --git a/libcxx/include/version b/libcxx/include/version index a91c344c436090..7aa002e257b0a1 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -93,7 +93,7 @@ __cpp_lib_expected 202211L __cpp_lib_filesystem 201703L __cpp_lib_format 202106L __cpp_lib_format_ranges 202207L -__cpp_lib_format_uchar 202311L <> +__cpp_lib_format_uchar 202311L __cpp_lib_formatters 202302L __cpp_lib_forward_like 202207L __cpp_lib_freestanding_algorithm 202311L @@ -188,7 +188,7 @@ __cpp_lib_remove_cvref 201711L __cpp_lib_result_of_sfinae 201210L __cpp_lib_robust_nonmodifying_seq_ops 201304L __cpp_lib_sample 201603L -__cpp_lib_saturation_arithmetic 202311L <> +__cpp_lib_saturation_arithmetic 202311L __cpp_lib_scoped_lock 201703L __cpp_lib_semaphore 201907L __cpp_lib_shared_mutex 201505L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp deleted file mode 100644 index 2486985cefaca0..00000000000000 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp +++ /dev/null @@ -1,105 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// WARNING: This test was generated by generate_feature_test_macro_components.py -// and should not be edited manually. -// -// clang-format off - -// <> - -// Test the feature test macros defined by <> - -/* Constant Value - __cpp_lib_format_uchar 202311L [C++20] - __cpp_lib_saturation_arithmetic 202311L [C++26] -*/ - -#include <> -#include "test_macros.h" - -#if TEST_STD_VER < 14 - -# ifdef __cpp_lib_format_uchar -# error "__cpp_lib_format_uchar should not be defined before c++20" -# endif - -# ifdef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" -# endif - -#elif TEST_STD_VER == 14 - -# ifdef __cpp_lib_format_uchar -# error "__cpp_lib_format_uchar should not be defined before c++20" -# endif - -# ifdef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" -# endif - -#elif TEST_STD_VER == 17 - -# ifdef __cpp_lib_format_uchar -# error "__cpp_lib_format_uchar should not be defined before c++20" -# endif - -# ifdef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" -# endif - -#elif TEST_STD_VER == 20 - -# ifndef __cpp_lib_format_uchar -# error "__cpp_lib_format_uchar should be defined in c++20" -# endif -# if __cpp_lib_format_uchar != 202311L -# error "__cpp_lib_format_uchar should have the value 202311L in c++20" -# endif - -# ifdef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" -# endif - -#elif TEST_STD_VER == 23 - -# ifndef __cpp_lib_format_uchar -# error "__cpp_lib_format_uchar should be defined in c++23" -# endif -# if __cpp_lib_format_uchar != 202311L -# error "__cpp_lib_format_uchar should have the value 202311L in c++23" -# endif - -# ifdef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" -# endif - -#elif TEST_STD_VER > 23 - -# ifndef __cpp_lib_format_uchar -# error "__cpp_lib_format_uchar should be defined in c++26" -# endif -# if __cpp_lib_format_uchar != 202311L -# error "__cpp_lib_format_uchar should have the value 202311L in c++26" -# endif - -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should be defined in c++26" -# endif -# if __cpp_lib_saturation_arithmetic != 202311L -# error "__cpp_lib_saturation_arithmetic should have the value 202311L in c++26" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_saturation_arithmetic -# error "__cpp_lib_saturation_arithmetic should not be defined because it is unimplemented in libc++!" -# endif -# endif - -#endif // TEST_STD_VER > 23 - diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp index 4e2c50483c2298..aa7b2f4286827b 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp @@ -18,6 +18,7 @@ /* Constant Value __cpp_lib_format 202106L [C++20] __cpp_lib_format_ranges 202207L [C++23] + __cpp_lib_format_uchar 202311L [C++20] */ #include @@ -33,6 +34,10 @@ # error "__cpp_lib_format_ranges should not be defined before c++23" # endif +# ifdef __cpp_lib_format_uchar +# error "__cpp_lib_format_uchar should not be defined before c++20" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_format @@ -43,6 +48,10 @@ # error "__cpp_lib_format_ranges should not be defined before c++23" # endif +# ifdef __cpp_lib_format_uchar +# error "__cpp_lib_format_uchar should not be defined before c++20" +# endif + #elif TEST_STD_VER == 17 # ifdef __cpp_lib_format @@ -53,6 +62,10 @@ # error "__cpp_lib_format_ranges should not be defined before c++23" # endif +# ifdef __cpp_lib_format_uchar +# error "__cpp_lib_format_uchar should not be defined before c++20" +# endif + #elif TEST_STD_VER == 20 # if !defined(_LIBCPP_VERSION) @@ -72,6 +85,13 @@ # error "__cpp_lib_format_ranges should not be defined before c++23" # endif +# ifndef __cpp_lib_format_uchar +# error "__cpp_lib_format_uchar should be defined in c++20" +# endif +# if __cpp_lib_format_uchar != 202311L +# error "__cpp_lib_format_uchar should have the value 202311L in c++20" +# endif + #elif TEST_STD_VER == 23 # if !defined(_LIBCPP_VERSION) @@ -94,6 +114,13 @@ # error "__cpp_lib_format_ranges should have the value 202207L in c++23" # endif +# ifndef __cpp_lib_format_uchar +# error "__cpp_lib_format_uchar should be defined in c++23" +# endif +# if __cpp_lib_format_uchar != 202311L +# error "__cpp_lib_format_uchar should have the value 202311L in c++23" +# endif + #elif TEST_STD_VER > 23 # if !defined(_LIBCPP_VERSION) @@ -116,5 +143,12 @@ # error "__cpp_lib_format_ranges should have the value 202207L in c++26" # endif +# ifndef __cpp_lib_format_uchar +# error "__cpp_lib_format_uchar should be defined in c++26" +# endif +# if __cpp_lib_format_uchar != 202311L +# error "__cpp_lib_format_uchar should have the value 202311L in c++26" +# endif + #endif // TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp index 60004b06c5ff52..b510eefc69a5d3 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp @@ -15,12 +15,13 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_constexpr_numeric 201911L [C++20] - __cpp_lib_gcd_lcm 201606L [C++17] - __cpp_lib_interpolate 201902L [C++20] - __cpp_lib_parallel_algorithm 201603L [C++17] - __cpp_lib_ranges_iota 202202L [C++23] +/* Constant Value + __cpp_lib_constexpr_numeric 201911L [C++20] + __cpp_lib_gcd_lcm 201606L [C++17] + __cpp_lib_interpolate 201902L [C++20] + __cpp_lib_parallel_algorithm 201603L [C++17] + __cpp_lib_ranges_iota 202202L [C++23] + __cpp_lib_saturation_arithmetic 202311L [C++26] */ #include @@ -48,6 +49,10 @@ # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif +# ifdef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_constexpr_numeric @@ -70,6 +75,10 @@ # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif +# ifdef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" +# endif + #elif TEST_STD_VER == 17 # ifdef __cpp_lib_constexpr_numeric @@ -104,6 +113,10 @@ # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif +# ifdef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" +# endif + #elif TEST_STD_VER == 20 # ifndef __cpp_lib_constexpr_numeric @@ -144,6 +157,10 @@ # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif +# ifdef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" +# endif + #elif TEST_STD_VER == 23 # ifndef __cpp_lib_constexpr_numeric @@ -193,6 +210,10 @@ # endif # endif +# ifdef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should not be defined before c++26" +# endif + #elif TEST_STD_VER > 23 # ifndef __cpp_lib_constexpr_numeric @@ -242,5 +263,18 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should be defined in c++26" +# endif +# if __cpp_lib_saturation_arithmetic != 202311L +# error "__cpp_lib_saturation_arithmetic should have the value 202311L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_saturation_arithmetic +# error "__cpp_lib_saturation_arithmetic should not be defined because it is unimplemented in libc++!" +# endif +# endif + #endif // TEST_STD_VER > 23 diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 6a30324397883a..70e18b6dde006e 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -486,7 +486,9 @@ def add_version_header(tc): "values": { "c++20": 202311 # DR P2909R4 Fix formatting of code units as integers }, - "headers": [""], # Note not in format + "headers": [ + "format" # TODO verify this entry since the paper was underspecified. + ], }, { "name": "__cpp_lib_formatters", @@ -660,7 +662,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_ios_noreplace", - "values": { "c++23": 202207 }, + "values": {"c++23": 202207}, "headers": ["ios"], }, { @@ -1010,7 +1012,9 @@ def add_version_header(tc): { "name": "__cpp_lib_saturation_arithmetic", "values": {"c++26": 202311}, # P0543R3 Saturation arithmetic - "headers": [""], # Note not in + "headers": [ + "numeric" # TODO verify this entry since the paper was underspecified. + ], "unimplemented": True, }, { From 3b08b3340d5adbd001cd1b328f6fd6ecf5d92f4a Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 18 Dec 2023 11:45:21 +0000 Subject: [PATCH 13/17] [RISCV][test] Fix lifetime bug with Module in test As pointed out by @s-barannikov in post-commit review of #72356, the module is destroyed at the end of the constructor. --- llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp index b4c96a9c2a62ce..5ef86bb3f7b46e 100644 --- a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp +++ b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp @@ -31,6 +31,7 @@ class RISCVInstrInfoTest : public testing::TestWithParam { std::unique_ptr ST; std::unique_ptr MMI; std::unique_ptr MF; + std::unique_ptr M; static void SetUpTestSuite() { LLVMInitializeRISCVTargetInfo(); @@ -49,10 +50,10 @@ class RISCVInstrInfoTest : public testing::TestWithParam { CodeGenOptLevel::Default))); Ctx = std::make_unique(); - Module M("Module", *Ctx); - M.setDataLayout(TM->createDataLayout()); + M = std::make_unique("Module", *Ctx); + M->setDataLayout(TM->createDataLayout()); auto *FType = FunctionType::get(Type::getVoidTy(*Ctx), false); - auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", &M); + auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", *M); MMI = std::make_unique(TM.get()); ST = std::make_unique( From a00c4220be6a9ce17e2a0b88191bdd08e65bf1ad Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 18 Dec 2023 12:44:25 +0100 Subject: [PATCH 14/17] [SystemZ] Fix complex address matching when i128 is legal Complex address matching currently handles truncations, under the assumption that those are no-ops. This is no longer true when i128 is legal. Change the code to only handle actual no-op truncations. Fixes https://github.com/llvm/llvm-project/issues/75708 Fixes https://github.com/llvm/llvm-project/issues/75714 --- .../Target/SystemZ/SystemZISelDAGToDAG.cpp | 3 +- llvm/test/CodeGen/SystemZ/addr-04.ll | 46 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/SystemZ/addr-04.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index e5e1e91916f32d..c7d8591c5bdf6f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -465,7 +465,8 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, bool IsBase) const { SDValue N = IsBase ? AM.Base : AM.Index; unsigned Opcode = N.getOpcode(); - if (Opcode == ISD::TRUNCATE) { + // Look through no-op truncations. + if (Opcode == ISD::TRUNCATE && N.getOperand(0).getValueSizeInBits() <= 64) { N = N.getOperand(0); Opcode = N.getOpcode(); } diff --git a/llvm/test/CodeGen/SystemZ/addr-04.ll b/llvm/test/CodeGen/SystemZ/addr-04.ll new file mode 100644 index 00000000000000..245623fea9b9ba --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/addr-04.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; Test complex addresses with base or index truncated from 128 bit. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Shift amount with base truncated from 128 bit to 32 bit. +define void @f1(i128 %x, i32 %y, ptr %px, ptr %py) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: vl %v0, 0(%r2), 3 +; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vaq %v0, %v0, %v1 +; CHECK-NEXT: vlgvf %r1, %v0, 3 +; CHECK-NEXT: srl %r3, 0(%r1) +; CHECK-NEXT: vst %v0, 0(%r4), 3 +; CHECK-NEXT: st %r3, 0(%r5) +; CHECK-NEXT: br %r14 + %x1 = add i128 %x, 1 + store i128 %x1, ptr %px, align 8 + %amt = trunc i128 %x1 to i32 + %y1 = lshr i32 %y, %amt + store i32 %y1, ptr %py, align 4 + ret void +} + +; Memory address with index truncated from 128 bit to 64 bit. +define i8 @f2(ptr %base, ptr %p) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vaq %v0, %v0, %v1 +; CHECK-NEXT: vlgvg %r1, %v0, 1 +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: lb %r2, 0(%r1,%r2) +; CHECK-NEXT: br %r14 + %idx = load i128, ptr %p, align 8 + %inc = add nsw i128 %idx, 1 + store i128 %inc, ptr %p, align 8 + %idxprom = trunc i128 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %base, i64 %idxprom + %res = load i8, ptr %arrayidx, align 1 + ret i8 %res +} From df3ddd78f640ebec74151028d919904c6cf9ecdd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 18 Dec 2023 11:51:13 +0000 Subject: [PATCH 15/17] CGBuiltin - fix gcc Wunused-variable warning. NFC. --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c96f86a823a461..c42b885289c4c7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -942,7 +942,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, : Builder.CreateZExtOrTrunc(FAMSize, ResType); Value *Res = FAMSize; - if (const auto *DRE = dyn_cast(Base)) { + if (isa(Base)) { // The whole struct is specificed in the __bdos. const RecordDecl *OuterRD = CountedByFD->getDeclContext()->getOuterLexicalRecordContext(); From 2f8178806770b549c054b02ae3556b454cdeb722 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Mon, 18 Dec 2023 18:57:36 +0700 Subject: [PATCH 16/17] [ARM][FPEnv] Lowering of fpmode intrinsics (#74054) LLVM intrinsics `get_fpmode`, `set_fpmode` and `reset_fpmode` operate control modes, the bits of FP environment that affect FP operations. On ARM these bits are in FPSCR together with the status bits. The implementation of these intrinsics produces code close to that of functions `fegetmode` and `fesetmode` from GLIBC. Pull request: https://github.com/llvm/llvm-project/pull/74054 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 58 +++++++++++++++ llvm/lib/Target/ARM/ARMISelLowering.h | 10 +++ llvm/lib/Target/ARM/ARMInstrVFP.td | 1 + llvm/test/CodeGen/ARM/fpenv.ll | 98 +++++++++++++++++++++++++ 4 files changed, 167 insertions(+) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index db63facca870f8..d00b7853816e19 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1415,6 +1415,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::GET_FPENV, MVT::i32, Legal); setOperationAction(ISD::SET_FPENV, MVT::i32, Legal); setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal); + setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal); + setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom); + setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom); } // We want to custom lower some of our intrinsics. @@ -6447,6 +6450,57 @@ SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op, return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); } +SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue Mode = Op->getOperand(1); + + // Generate nodes to build: + // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits) + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)}; + SDValue FPSCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops); + Chain = FPSCR.getValue(1); + FPSCR = FPSCR.getValue(0); + + SDValue FPSCRMasked = + DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR, + DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32)); + SDValue InputMasked = + DAG.getNode(ISD::AND, DL, MVT::i32, Mode, + DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32)); + FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked); + + SDValue Ops2[] = { + Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR}; + return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); +} + +SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + + // To get the default FP mode all control bits are cleared: + // FPSCR = FPSCR & (FPStatusBits | FPReservedBits) + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)}; + SDValue FPSCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops); + Chain = FPSCR.getValue(1); + FPSCR = FPSCR.getValue(0); + + SDValue FPSCRMasked = DAG.getNode( + ISD::AND, DL, MVT::i32, FPSCR, + DAG.getConstant(ARM::FPStatusBits | ARM::FPReservedBits, DL, MVT::i32)); + SDValue Ops2[] = {Chain, + DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), + FPSCRMasked}; + return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); @@ -10557,6 +10611,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); + case ISD::SET_FPMODE: + return LowerSET_FPMODE(Op, DAG); + case ISD::RESET_FPMODE: + return LowerRESET_FPMODE(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 6c2b92de7a1df2..f398b01f418629 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -375,6 +375,14 @@ class VectorType; // Bit position of rounding mode bits in FPSCR. const unsigned RoundingBitsPos = 22; + + // Bits of floating-point status. These are NZCV flags, QC bit and cumulative + // FP exception bits. + const unsigned FPStatusBits = 0xf800009f; + + // Some bits in the FPSCR are not yet defined. They must be preserved when + // modifying the contents. + const unsigned FPReservedBits = 0x00006060; } // namespace ARM /// Define some predicates that are used for node matching. @@ -835,6 +843,8 @@ class VectorType; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 800527bcf756c6..55d3efbd9b9a2b 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2675,6 +2675,7 @@ def : Pat<(get_fpenv), (VMRS)>; def : Pat<(set_fpenv GPRnopc:$Rt), (VMSR GPRnopc:$Rt)>; def : Pat<(reset_fpenv), (VMSR (MOVi 0))>, Requires<[IsARM]>; def : Pat<(reset_fpenv), (VMSR (tMOVi8 0))>, Requires<[IsThumb]>; +def : Pat<(get_fpmode), (VMRS)>; //===----------------------------------------------------------------------===// // Assembler aliases. diff --git a/llvm/test/CodeGen/ARM/fpenv.ll b/llvm/test/CodeGen/ARM/fpenv.ll index 40db627ebb3c23..f5d87170d9153e 100644 --- a/llvm/test/CodeGen/ARM/fpenv.ll +++ b/llvm/test/CodeGen/ARM/fpenv.ll @@ -142,9 +142,107 @@ entry: ret void } +define i32 @get_fpmode_01() #0 { +; CHECK-LABEL: get_fpmode_01: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: bl fegetmode +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + %fpenv = call i32 @llvm.get.fpmode.i32() + ret i32 %fpenv +} + +define i32 @get_fpmode_02() nounwind { +; CHECK-LABEL: get_fpmode_02: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: mov pc, lr +entry: + %fpenv = call i32 @llvm.get.fpmode.i32() + ret i32 %fpenv +} + +define void @set_fpmode_01(i32 %fpmode) #0 { +; CHECK-LABEL: set_fpmode_01: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: str r0, [sp, #4] +; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: bl fesetmode +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.set.fpmode.i32(i32 %fpmode) + ret void +} + +define void @set_fpmode_02(i32 %fpmode) nounwind { +; CHECK-LABEL: set_fpmode_02: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r1, fpscr +; CHECK-NEXT: mvn r2, #159 +; CHECK-NEXT: sub r2, r2, #-134217728 +; CHECK-NEXT: and r0, r0, r2 +; CHECK-NEXT: mov r2, #159 +; CHECK-NEXT: orr r2, r2, #-134217728 +; CHECK-NEXT: and r1, r1, r2 +; CHECK-NEXT: orr r0, r1, r0 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.set.fpmode.i32(i32 %fpmode) + ret void +} + +define void @reset_fpmode_01() #0 { +; CHECK-LABEL: reset_fpmode_01: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: mvn r0, #0 +; CHECK-NEXT: bl fesetmode +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.reset.fpmode() + ret void +} + +define void @reset_fpmode_02() nounwind { +; CHECK-LABEL: reset_fpmode_02: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: ldr r1, .LCPI16_0 +; CHECK-NEXT: and r0, r0, r1 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: mov pc, lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI16_0: +; CHECK-NEXT: .long 4160774399 @ 0xf80060ff +entry: + call void @llvm.reset.fpmode() + ret void +} + attributes #0 = { nounwind "use-soft-float"="true" } declare void @llvm.set.rounding(i32) declare i32 @llvm.get.fpenv.i32() declare void @llvm.set.fpenv.i32(i32 %fpenv) declare void @llvm.reset.fpenv() +declare i32 @llvm.get.fpmode.i32() +declare void @llvm.set.fpmode.i32(i32 %fpmode) +declare void @llvm.reset.fpmode() From dea16ebd2613a4a218c53045270fc4fcc9b427ad Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Mon, 18 Dec 2023 11:58:42 +0000 Subject: [PATCH 17/17] [LLVM][IR] Replace ConstantInt's specialisation of getType() with getIntegerType(). (#75217) The specialisation will not be valid when ConstantInt gains native support for vector types. This is largely a mechanical change but with extra attention paid to constant folding, InstCombineVectorOps.cpp, LoopFlatten.cpp and Verifier.cpp to remove the need to call `getIntegerType()`. Co-authored-by: Nikita Popov --- clang/lib/CodeGen/CGBuiltin.cpp | 7 ++++--- llvm/include/llvm/IR/Constants.h | 7 +++---- llvm/lib/Analysis/InstructionSimplify.cpp | 2 +- llvm/lib/IR/ConstantFold.cpp | 2 +- llvm/lib/IR/Verifier.cpp | 11 ++++++----- .../Hexagon/HexagonLoopIdiomRecognition.cpp | 6 +++--- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 15 ++++++++------- .../InstCombine/InstCombineVectorOps.cpp | 4 ++-- llvm/lib/Transforms/Scalar/ConstantHoisting.cpp | 5 ++--- llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 5 ++--- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 ++++---- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 2 +- 12 files changed, 37 insertions(+), 37 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c42b885289c4c7..4eb1686f095062 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3214,7 +3214,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast(AlignmentValue); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) - AlignmentCI = ConstantInt::get(AlignmentCI->getType(), + AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), llvm::Value::MaximumAlignment); emitAlignmentAssumption(PtrValue, Ptr, @@ -17034,7 +17034,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast(Op0); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) - AlignmentCI = ConstantInt::get(AlignmentCI->getType(), + AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), llvm::Value::MaximumAlignment); emitAlignmentAssumption(Op1, E->getArg(1), @@ -17272,7 +17272,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); if (getTarget().isLittleEndian()) - Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); + Index = + ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue()); return Builder.CreateExtractElement(Unpacked, Index); } diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 0b9f89830b79c6..b5dcc7fbc1d929 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -171,10 +171,9 @@ class ConstantInt final : public ConstantData { /// Determine if this constant's value is same as an unsigned char. bool equalsInt(uint64_t V) const { return Val == V; } - /// getType - Specialize the getType() method to always return an IntegerType, - /// which reduces the amount of casting needed in parts of the compiler. - /// - inline IntegerType *getType() const { + /// Variant of the getType() method to always return an IntegerType, which + /// reduces the amount of casting needed in parts of the compiler. + inline IntegerType *getIntegerType() const { return cast(Value::getType()); } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 2a45acf63aa2ca..5beac5547d65e0 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6079,7 +6079,7 @@ static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset, Type *Int32Ty = Type::getInt32Ty(Ptr->getContext()); auto *OffsetConstInt = dyn_cast(Offset); - if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64) + if (!OffsetConstInt || OffsetConstInt->getBitWidth() > 64) return nullptr; APInt OffsetInt = OffsetConstInt->getValue().sextOrTrunc( diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index d499d74f7ba010..7fdc35e7fca097 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -868,7 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, } if (GVAlign > 1) { - unsigned DstWidth = CI2->getType()->getBitWidth(); + unsigned DstWidth = CI2->getBitWidth(); unsigned SrcWidth = std::min(DstWidth, Log2(GVAlign)); APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth)); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8aba28026306a5..aeaca21a99cc5e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2296,10 +2296,9 @@ void Verifier::verifyFunctionMetadata( Check(isa(MD->getOperand(0)), "expected a constant operand for !kcfi_type", MD); Constant *C = cast(MD->getOperand(0))->getValue(); - Check(isa(C), + Check(isa(C) && isa(C->getType()), "expected a constant integer operand for !kcfi_type", MD); - IntegerType *Type = cast(C)->getType(); - Check(Type->getBitWidth() == 32, + Check(cast(C)->getBitWidth() == 32, "expected a 32-bit integer constant operand for !kcfi_type", MD); } } @@ -5690,8 +5689,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "vector of ints"); auto *Op3 = cast(Call.getArgOperand(2)); - Check(Op3->getType()->getBitWidth() <= 32, - "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits"); + Check(Op3->getType()->isIntegerTy(), + "third operand of [us][mul|div]_fix[_sat] must be an int type"); + Check(Op3->getBitWidth() <= 32, + "third operand of [us][mul|div]_fix[_sat] must fit within 32 bits"); if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat || ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) { diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 51ef72b873a516..7777ae23e8aecd 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1062,7 +1062,7 @@ void PolynomialMultiplyRecognize::promoteTo(Instruction *In, // Promote immediates. for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { if (ConstantInt *CI = dyn_cast(In->getOperand(i))) - if (CI->getType()->getBitWidth() < DestBW) + if (CI->getBitWidth() < DestBW) In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); } } @@ -1577,7 +1577,7 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, static bool hasZeroSignBit(const Value *V) { if (const auto *CI = dyn_cast(V)) - return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; + return CI->getValue().isNonNegative(); const Instruction *I = dyn_cast(V); if (!I) return false; @@ -1688,7 +1688,7 @@ void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { if (I->getOpcode() != Instruction::Or) return nullptr; ConstantInt *Msb = dyn_cast(I->getOperand(1)); - if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) + if (!Msb || !Msb->getValue().isSignMask()) return nullptr; if (!hasZeroSignBit(I->getOperand(0))) return nullptr; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b2665161c090df..2c880316e0a1cd 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3763,7 +3763,7 @@ struct AAKernelInfoFunction : AAKernelInfo { ConstantInt *ExecModeC = KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); ConstantInt *AssumedExecModeC = ConstantInt::get( - ExecModeC->getType(), + ExecModeC->getIntegerType(), ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); @@ -3792,7 +3792,7 @@ struct AAKernelInfoFunction : AAKernelInfo { ConstantInt *MayUseNestedParallelismC = KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC); ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get( - MayUseNestedParallelismC->getType(), NestedParallelism); + MayUseNestedParallelismC->getIntegerType(), NestedParallelism); setMayUseNestedParallelismOfKernelEnvironment( AssumedMayUseNestedParallelismC); @@ -3801,7 +3801,7 @@ struct AAKernelInfoFunction : AAKernelInfo { KernelInfo::getUseGenericStateMachineFromKernelEnvironment( KernelEnvC); ConstantInt *AssumedUseGenericStateMachineC = - ConstantInt::get(UseGenericStateMachineC->getType(), false); + ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false); setUseGenericStateMachineOfKernelEnvironment( AssumedUseGenericStateMachineC); } @@ -4280,8 +4280,9 @@ struct AAKernelInfoFunction : AAKernelInfo { // kernel is executed in. assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); - setExecModeOfKernelEnvironment(ConstantInt::get( - ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); + setExecModeOfKernelEnvironment( + ConstantInt::get(ExecModeC->getIntegerType(), + ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); ++NumOpenMPTargetRegionKernelsSPMD; @@ -4332,7 +4333,7 @@ struct AAKernelInfoFunction : AAKernelInfo { // If not SPMD mode, indicate we use a custom state machine now. setUseGenericStateMachineOfKernelEnvironment( - ConstantInt::get(UseStateMachineC->getType(), false)); + ConstantInt::get(UseStateMachineC->getIntegerType(), false)); // If we don't actually need a state machine we are done here. This can // happen if there simply are no parallel regions. In the resulting kernel @@ -4658,7 +4659,7 @@ struct AAKernelInfoFunction : AAKernelInfo { KernelInfo::getMayUseNestedParallelismFromKernelEnvironment( AA.KernelEnvC); ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get( - MayUseNestedParallelismC->getType(), AA.NestedParallelism); + MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism); AA.setMayUseNestedParallelismOfKernelEnvironment( NewMayUseNestedParallelismC); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index c381d25011f688..bd5f608045cf11 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -388,7 +388,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) { /// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't /// matter, we just want a consistent type to simplify CSE. static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) { - const unsigned IndexBW = IndexC->getType()->getBitWidth(); + const unsigned IndexBW = IndexC->getBitWidth(); if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64) return nullptr; return ConstantInt::get(IndexC->getContext(), @@ -2640,7 +2640,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?"); // Index is updated to the potentially translated insertion lane. - IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex); + IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex); return true; }; diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 1fb9d7fff32f66..9e40d94dd73c76 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -674,8 +674,7 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) { llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) { if (LHS.ConstInt->getType() != RHS.ConstInt->getType()) - return LHS.ConstInt->getType()->getBitWidth() < - RHS.ConstInt->getType()->getBitWidth(); + return LHS.ConstInt->getBitWidth() < RHS.ConstInt->getBitWidth(); return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue()); }); @@ -890,7 +889,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) { Type *Ty = ConstInfo.BaseExpr->getType(); Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP); } else { - IntegerType *Ty = ConstInfo.BaseInt->getType(); + IntegerType *Ty = ConstInfo.BaseInt->getIntegerType(); Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP); } diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index b1add3c42976fd..eef94636578d83 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -343,9 +343,8 @@ static bool verifyTripCount(Value *RHS, Loop *L, // If the RHS of the compare is equal to the backedge taken count we need // to add one to get the trip count. if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) { - ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1); - Value *NewRHS = ConstantInt::get( - ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue()); + Value *NewRHS = ConstantInt::get(ConstantRHS->getContext(), + ConstantRHS->getValue() + 1); return setLoopComponents(NewRHS, TripCount, Increment, IterationInstructions); } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 89494a7f64971f..55e375670cc61e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6293,7 +6293,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { } case BitMapKind: { // Type of the bitmap (e.g. i59). - IntegerType *MapTy = BitMap->getType(); + IntegerType *MapTy = BitMap->getIntegerType(); // Cast Index to the same type as the bitmap. // Note: The Index is <= the number of elements in the table, so @@ -6668,7 +6668,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *TableIndex; ConstantInt *TableIndexOffset; if (UseSwitchConditionAsTableIndex) { - TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0); + TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0); TableIndex = SI->getCondition(); } else { TableIndexOffset = MinCaseVal; @@ -6752,7 +6752,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Get the TableIndex'th bit of the bitmask. // If this bit is 0 (meaning hole) jump to the default destination, // else continue with table lookup. - IntegerType *MapTy = TableMask->getType(); + IntegerType *MapTy = TableMask->getIntegerType(); Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); @@ -6975,7 +6975,7 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, // Replace each case with its trailing zeros number. for (auto &Case : SI->cases()) { auto *OrigValue = Case.getCaseValue(); - Case.setValue(ConstantInt::get(OrigValue->getType(), + Case.setValue(ConstantInt::get(OrigValue->getIntegerType(), OrigValue->getValue().countr_zero())); } diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index ec2692f58695d0..905405e9398820 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -720,7 +720,7 @@ static TypedAttr getScalarConstantAsAttr(OpBuilder &builder, // Convert scalar intergers. if (auto *constInt = dyn_cast(constScalar)) { return builder.getIntegerAttr( - IntegerType::get(context, constInt->getType()->getBitWidth()), + IntegerType::get(context, constInt->getBitWidth()), constInt->getValue()); }