From d777504355dac9a2a8a7bbba6001d05b936868b2 Mon Sep 17 00:00:00 2001
From: Kareem Ergawy <kareem.ergawy@amd.com>
Date: Mon, 18 Dec 2023 11:14:46 +0100
Subject: [PATCH 01/17] [MLIR][OpenMP][Offload] Lower target update op to
 DeviceRT (#75159)

Adds support for lowring `UpdateDataOp` to the DeviceRT. This reuses the
existing utils used by other device directive.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 24 ++++++++++--
 mlir/test/Target/LLVMIR/omptarget-llvm.mlir   | 38 +++++++++++++++++++
 2 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4f6200d29a70a6..088e7ae4231bef 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1915,6 +1915,23 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
             mapOperands = exitDataOp.getMapOperands();
             return success();
           })
+          .Case([&](omp::UpdateDataOp updateDataOp) {
+            if (updateDataOp.getNowait())
+              return failure();
+
+            if (auto ifExprVar = updateDataOp.getIfExpr())
+              ifCond = moduleTranslation.lookupValue(ifExprVar);
+
+            if (auto devId = updateDataOp.getDevice())
+              if (auto constOp =
+                      dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
+                if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
+                  deviceID = intAttr.getInt();
+
+            RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper;
+            mapOperands = updateDataOp.getMotionOperands();
+            return success();
+          })
           .Default([&](Operation *op) {
             return op->emitError("unsupported OpenMP operation: ")
                    << op->getName();
@@ -2748,9 +2765,10 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
       .Case([&](omp::ThreadprivateOp) {
         return convertOmpThreadprivate(*op, builder, moduleTranslation);
       })
-      .Case<omp::DataOp, omp::EnterDataOp, omp::ExitDataOp>([&](auto op) {
-        return convertOmpTargetData(op, builder, moduleTranslation);
-      })
+      .Case<omp::DataOp, omp::EnterDataOp, omp::ExitDataOp, omp::UpdateDataOp>(
+          [&](auto op) {
+            return convertOmpTargetData(op, builder, moduleTranslation);
+          })
       .Case([&](omp::TargetOp) {
         return convertOmpTarget(*op, builder, moduleTranslation);
       })
diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index 9221b410d766ed..b089d47f795df3 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -441,3 +441,41 @@ llvm.func @_QPopenmp_target_use_dev_both() {
 // CHECK:         ret void
 
 // -----
+
+llvm.func @_QPopenmp_target_data_update() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array<i32: 0, 0>, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr
+  %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32)   map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
+  omp.target_data map_entries(%2 : !llvm.ptr) {
+    %3 = llvm.mlir.constant(99 : i32) : i32
+    llvm.store %3, %1 : i32, !llvm.ptr
+    omp.terminator
+  }
+
+  omp.target_update_data motion_entries(%2 : !llvm.ptr)
+
+  llvm.return
+}
+
+// CHECK-LABEL: define void @_QPopenmp_target_data_update
+
+// CHECK-DAG:     %[[OFFLOAD_BASEPTRS:.*]] = alloca [1 x ptr], align 8
+// CHECK-DAG:     %[[OFFLOAD_PTRS:.*]] = alloca [1 x ptr], align 8
+// CHECK-DAG:     %[[INT_ALLOCA:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG:     %[[OFFLOAD_MAPPERS:.*]] = alloca [1 x ptr], align 8
+
+// CHECK:         call void @__tgt_target_data_begin_mapper
+// CHECK:         store i32 99, ptr %[[INT_ALLOCA]], align 4
+// CHECK:         call void @__tgt_target_data_end_mapper
+
+// CHECK:         %[[BASEPTRS_VAL:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK:         store ptr %[[INT_ALLOCA]], ptr %[[BASEPTRS_VAL]], align 8
+// CHECK:         %[[PTRS_VAL:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_PTRS]], i32 0, i32 0
+// CHECK:         store ptr %[[INT_ALLOCA]], ptr %[[PTRS_VAL]], align 8
+// CHECK:         %[[MAPPERS_VAL:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK:         store ptr null, ptr %[[MAPPERS_VAL]], align 8
+// CHECK:         %[[BASEPTRS_VAL_2:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK:         %[[PTRS_VAL_2:.*]] = getelementptr inbounds [1 x ptr], ptr %[[OFFLOAD_PTRS]], i32 0, i32 0
+// CHECK:         call void @__tgt_target_data_update_mapper(ptr @2, i64 -1, i32 1, ptr %[[BASEPTRS_VAL_2]], ptr %[[PTRS_VAL_2]], ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr null)
+
+// CHECK:         ret void

From 9e3d915d8ebf86e24c9ff58766be8e7c6aa7b0c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 18 Dec 2023 12:31:05 +0200
Subject: [PATCH 02/17] [llvm-windres] Change the interpretation of
 --preprocessor to match Binutils 2.36 (#75391)

Binutils 2.36 had a somewhat controversial change in how the
--preprocessor option was handled in GNU windres; previously, the option
was interpreted as a part of the command string, potentially containing
multiple arguments (which even was hinted at in the documentation).

In Binutils 2.36, this was changed to interpret the --preprocessor
argument as one argument (possibly containing spaces) pointing at the
preprocessor executable.

The existing behaviour where implicit arguments like -E -xc -DRC_INVOKED
are dropped if --preprocessor is specified, was kept.

This was a breaking change for some users of GNU windres, see
https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=21c33bcbe36377abf01614fb1b9be439a3b6de20,
https://sourceware.org/bugzilla/show_bug.cgi?id=27594, and
https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=5edb8e3f5ad8d74a83fc0df7f6e4514eed0aa77f.

As multiple years have passed since, the behaviour change seems to be
here to stay, and any users of the previous form of the option have been
forced to avoid this construct. Thus update llvm-windres to match the
new way Binutils of handling this option.

One construct for specifying the path to the preprocessor, which works
both before and after binutils 2.36 (and this change in llvm-windres) is
to specify options like this:

--preprocessor path/to/executable --preprocessor-arg -E
--preprocessor-arg -xc -DRC_INVOKED
---
 llvm/test/tools/llvm-rc/windres-preproc.test |  9 +++-
 llvm/tools/llvm-rc/llvm-rc.cpp               | 44 +++-----------------
 2 files changed, 12 insertions(+), 41 deletions(-)

diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test
index 74e888614aa2b9..13f82299a074bb 100644
--- a/llvm/test/tools/llvm-rc/windres-preproc.test
+++ b/llvm/test/tools/llvm-rc/windres-preproc.test
@@ -6,8 +6,8 @@
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1
 ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1
 ; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
-; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc -E -DFOO=\\\"foo\\ bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2
-; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
+; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc" --preprocessor-arg -E "-DFOO=\\\"foo bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2
+; CHECK2: {{^}} "{{.*}}i686-w64-mingw32-gcc" "-E" "-D" "FOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
 
 ;; Test resolving the --preprocessor executable from PATH
 
@@ -22,3 +22,8 @@
 
 ; RUN: not llvm-windres --preprocessor intentionally-missing-executable %p/Inputs/empty.rc %t.res 2>&1 | FileCheck %s --check-prefix=CHECK4
 ; CHECK4: llvm-rc: Preprocessing failed: Executable "intentionally-missing-executable" doesn't exist!
+
+;; Test --preprocessor with an argument with spaces.
+
+; RUN: llvm-windres -### --preprocessor "path with spaces/gcc" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK5
+; CHECK5: {{^}} "path with spaces/gcc" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}}
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 27fb0309e0ee54..78ab96492acc75 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -209,7 +209,7 @@ struct RcOptions {
   bool Preprocess = true;
   bool PrintCmdAndExit = false;
   std::string Triple;
-  std::vector<std::string> PreprocessCmd;
+  std::optional<std::string> Preprocessor;
   std::vector<std::string> PreprocessArgs;
 
   std::string InputFile;
@@ -229,7 +229,7 @@ struct RcOptions {
 void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
                 const char *Argv0) {
   std::string Clang;
-  if (Opts.PrintCmdAndExit || !Opts.PreprocessCmd.empty()) {
+  if (Opts.PrintCmdAndExit || Opts.Preprocessor) {
     Clang = "clang";
   } else {
     ErrorOr<std::string> ClangOrErr = findClang(Argv0, Opts.Triple);
@@ -249,10 +249,9 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts,
       Clang, "--driver-mode=gcc", "-target", Opts.Triple, "-E",
       "-xc", "-DRC_INVOKED"};
   std::string PreprocessorExecutable;
-  if (!Opts.PreprocessCmd.empty()) {
+  if (Opts.Preprocessor) {
     Args.clear();
-    for (const auto &S : Opts.PreprocessCmd)
-      Args.push_back(S);
+    Args.push_back(*Opts.Preprocessor);
     if (!sys::fs::can_execute(Args[0])) {
       if (auto P = sys::findProgramByName(Args[0])) {
         PreprocessorExecutable = *P;
@@ -342,36 +341,6 @@ std::string unescape(StringRef S) {
   return Out;
 }
 
-std::vector<std::string> unescapeSplit(StringRef S) {
-  std::vector<std::string> OutArgs;
-  std::string Out;
-  bool InQuote = false;
-  for (int I = 0, E = S.size(); I < E; I++) {
-    if (S[I] == '\\') {
-      if (I + 1 < E)
-        Out.push_back(S[++I]);
-      else
-        fatalError("Unterminated escape");
-      continue;
-    }
-    if (S[I] == '"') {
-      InQuote = !InQuote;
-      continue;
-    }
-    if (S[I] == ' ' && !InQuote) {
-      OutArgs.push_back(Out);
-      Out.clear();
-      continue;
-    }
-    Out.push_back(S[I]);
-  }
-  if (InQuote)
-    fatalError("Unterminated quote");
-  if (!Out.empty())
-    OutArgs.push_back(Out);
-  return OutArgs;
-}
-
 RcOptions parseWindresOptions(ArrayRef<const char *> ArgsArr,
                               ArrayRef<const char *> InputArgsArray,
                               std::string Prefix) {
@@ -506,11 +475,8 @@ RcOptions parseWindresOptions(ArrayRef<const char *> ArgsArr,
       break;
     }
   }
-  // TODO: If --use-temp-file is set, we shouldn't be unescaping
-  // the --preprocessor argument either, only splitting it.
   if (InputArgs.hasArg(WINDRES_preprocessor))
-    Opts.PreprocessCmd =
-        unescapeSplit(InputArgs.getLastArgValue(WINDRES_preprocessor));
+    Opts.Preprocessor = InputArgs.getLastArgValue(WINDRES_preprocessor);
 
   Opts.Params.CodePage = CpWin1252; // Different default
   if (InputArgs.hasArg(WINDRES_codepage)) {

From 428660cfb986dd0a59cd2a16972c5f7109080522 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Mon, 18 Dec 2023 10:52:42 +0000
Subject: [PATCH 03/17] [GitHub] Don't indent comment that revision has passed
 the formatting check

Due to the way the f string was written, the text ended up with 4 spaces
at the start. 4 space indent in Markdown means plain text, which is not
what we intend here.
---
 llvm/utils/git/code-format-helper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 697a27ab82a750..849ae996f7339f 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -131,8 +131,8 @@ def run(self, changed_files: List[str], args: FormatArgs) -> bool:
         if diff is None:
             if should_update_gh:
                 comment_text = f"""
-    :white_check_mark: With the latest revision this PR passed the {self.friendly_name}.
-    """
+:white_check_mark: With the latest revision this PR passed the {self.friendly_name}.
+"""
                 self.update_pr(comment_text, args, create_new=False)
             return True
         elif len(diff) > 0:

From 6deb5d4e440f34e6838eaf617198caea7ce721c5 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski@amd.com>
Date: Mon, 18 Dec 2023 11:59:35 +0100
Subject: [PATCH 04/17] [NFC][OpenMP][MLIR] Verify if empty workshare loop is
 lowered correctly (#75518)

Check if workshare loop without loop body is lowered correctly i.e.:
  1) null pointer is passed to OpenMP device RTL function as a
     parameter which denotes loop function body aggregated parameters
  2) Outlined loop function body has only one parameter - loop counter
---
 mlir/test/Target/LLVMIR/omptarget-wsloop.mlir | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
index ba641be4dada17..220eb85b3483ec 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
@@ -15,6 +15,16 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       }
     llvm.return
   }
+
+  llvm.func @target_empty_wsloop(){
+      %loop_ub = llvm.mlir.constant(9 : i32) : i32
+      %loop_lb = llvm.mlir.constant(0 : i32) : i32
+      %loop_step = llvm.mlir.constant(1 : i32) : i32
+      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+        omp.yield
+      }
+    llvm.return
+  }
 }
 
 // CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]])
@@ -31,3 +41,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 // CHECK:   %[[GEP3:.*]] = getelementptr [10 x i32], ptr %[[LOADGEP]], i32 0, i32 %[[TMP2:.*]]
 // CHECK:   store i32 %[[VAL0:.*]], ptr %[[GEP3]], align 4
 
+// CHECK: define void @[[FUNC_EMPTY_WSLOOP:.*]]()
+// CHECK:   call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 10, i32 %[[NUM_THREADS:.*]], i32 0)
+
+// CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]])

From 1faa1cd02d569c1e58303ab4bd9d082aecda2a52 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Mon, 18 Dec 2023 11:03:39 +0000
Subject: [PATCH 05/17] [Clang][SVE2.1] Add intrinsics for `WHILEcc` resulting
 in predicate pair (#75107)

Add intrinsics of the form:

    svboolx2_t svwhile<cond>_b{8,16,32,64}_[{s,u}64]_x2([u]int64_t, [u]int64_t);

and their overloaded variants as specified in
https://github.com/ARM-software/acle/pull/257
---
 clang/include/clang/Basic/arm_sve.td          |  12 +
 .../acle_sve2p1_while_x2.c                    | 879 ++++++++++++++++++
 2 files changed, 891 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 98434c5c53e28c..a429a3c5fe378a 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1341,6 +1341,18 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNon
 def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>;
 }
 
+let TargetGuard = "sve2p1|sme2"  in {
+  def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl",     MergeNone, "aarch64_sve_whilege_x2">;
+  def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl",     MergeNone, "aarch64_sve_whilegt_x2">;
+  def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl",     MergeNone, "aarch64_sve_whilehi_x2">;
+  def SVWHILEHS_U64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2nn", "PcPsPiPl",     MergeNone, "aarch64_sve_whilehs_x2">;
+  def SVWHILELE_S64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2ll", "PcPsPiPl",     MergeNone, "aarch64_sve_whilele_x2">;
+  def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl",     MergeNone, "aarch64_sve_whilelt_x2">;
+  def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl",     MergeNone, "aarch64_sve_whilelo_x2">;
+  def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl",     MergeNone, "aarch64_sve_whilels_x2">;
+
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Uniform DSP operations
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c
new file mode 100644
index 00000000000000..acead9be3f01d2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c
@@ -0,0 +1,879 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -o /dev/null %s
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b8_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilege_b8_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b8_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilege_b8_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b16_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilege_b16_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b16_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilege_b16_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b32_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilege_b32_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b32_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilege_b32_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b64_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilege_b64_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilege_b64_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilege_b64_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b8_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilegt_b8_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b8_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilegt_b8_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b16_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilegt_b16_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b16_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilegt_b16_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b32_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilegt_b32_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b32_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilegt_b32_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b64_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilegt_b64_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilegt_b64_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilegt_b64_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b8_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilele_b8_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b8_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilele_b8_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b16_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilele_b16_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b16_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilele_b16_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b32_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilele_b32_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b32_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilele_b32_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b64_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilele_b64_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilele_b64_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilele_b64_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b8_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilelt_b8_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b8_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z21test_svwhilelt_b8_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b16_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilelt_b16_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b16_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilelt_b16_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b32_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilelt_b32_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b32_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilelt_b32_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b64_s64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilelt_b64_s64ll(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svwhilelt_b64_u64(
+// CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z22test_svwhilelt_b64_u64mm(
+// CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) {
+  return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2);
+}

From a34db9bdefe6d60b46f485aa1105ef6c95542e16 Mon Sep 17 00:00:00 2001
From: Jakub Chlanda <jakub@codeplay.com>
Date: Mon, 18 Dec 2023 12:07:22 +0100
Subject: [PATCH 06/17] [AMDGPU][NFC] Simplify needcopysign logic (#75176)

This was caught by coverity, reported as: `dead_error_condition`.
Since the conditional revolves around `CF`, it is guaranteed to be null
in the else clause, hence making the second part of the statement
redundant.
---
 llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 0c21382e5c225e..f03e6b8915b134 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1050,8 +1050,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
                      CF->isNegative();
     } else {
       needlog = true;
-      needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
-                               (!CF || CF->isNegative());
+      needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
     }
   } else {
     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);

From a4deb14e353cd8c543f78979ce986f98023d36dd Mon Sep 17 00:00:00 2001
From: Kiran Chandramohan <kiran.chandramohan@arm.com>
Date: Mon, 18 Dec 2023 11:19:32 +0000
Subject: [PATCH 07/17] [Flang][OpenMP] Add check-dag to private-clause-fixes
 test

---
 .../OpenMP/parallel-private-clause-fixes.f90  | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
index d7131296757917..8533106b7ac487 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
@@ -4,20 +4,20 @@
 
 ! CHECK-LABEL: multiple_private_fix
 ! CHECK-SAME:  %[[GAMA:.*]]: !fir.ref<i32> {fir.bindc_name = "gama"}
-! CHECK:         %[[GAMA_DECL:.*]]:2 = hlfir.declare %[[GAMA]] {uniq_name = "_QFmultiple_private_fixEgama"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:         %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_private_fixEi"}
-! CHECK:         %[[I_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:         %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFmultiple_private_fixEj"}
-! CHECK:         %[[J_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:         %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"}
-! CHECK:         %[[X_DECL:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:         %[[GAMA_DECL:.*]]:2 = hlfir.declare %[[GAMA]] {uniq_name = "_QFmultiple_private_fixEgama"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:         %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_private_fixEi"}
+! CHECK-DAG:         %[[I_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:         %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFmultiple_private_fixEj"}
+! CHECK-DAG:         %[[J_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:         %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"}
+! CHECK-DAG:         %[[X_DECL:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:         omp.parallel {
-! CHECK:           %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-! CHECK:           %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:           %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFmultiple_private_fixEj"}
-! CHECK:           %[[PRIV_J_DECL:.*]]:2 = hlfir.declare %[[PRIV_J]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:           %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned
-! CHECK:           %[[PRIV_X_DECL:.*]]:2 = hlfir.declare %[[PRIV_X]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:           %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK-DAG:           %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFmultiple_private_fixEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:           %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFmultiple_private_fixEj"}
+! CHECK-DAG:           %[[PRIV_J_DECL:.*]]:2 = hlfir.declare %[[PRIV_J]] {uniq_name = "_QFmultiple_private_fixEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG:           %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned
+! CHECK-DAG:           %[[PRIV_X_DECL:.*]]:2 = hlfir.declare %[[PRIV_X]] {uniq_name = "_QFmultiple_private_fixEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32

From 7c1d8c74e8576c12da526f589c802faa91bc410a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 18 Dec 2023 12:20:54 +0100
Subject: [PATCH 08/17] [ValueTracking] Add test for non-zero sub via known non
 equal (NFC)

---
 .../Analysis/ValueTracking/known-non-zero.ll     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
index dbec47ea0ae261..d8dbd1a1b0bda4 100644
--- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll
+++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
@@ -1218,3 +1218,19 @@ define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_fail(<2 x i8> %a, <2 x i8>
   ret <2 x i1> %r
 }
 
+define i1 @sub_via_non_eq(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_via_non_eq(
+; CHECK-NEXT:    [[NE:%.*]] = icmp ne i8 [[X:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[NE]])
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i8 [[X]], 3
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[SHL]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SUB]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ne = icmp ne i8 %x, 0
+  call void @llvm.assume(i1 %ne)
+  %shl = shl nuw i8 %x, 3
+  %sub = sub i8 %x, %shl
+  %cmp = icmp eq i8 %sub, 0
+  ret i1 %cmp
+}

From 337504683efa0b898bcf69b4e5be67d73dbaf180 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 18 Dec 2023 09:58:23 +0100
Subject: [PATCH 09/17] [ValueTracking] Use isKnownNonEqual() in isNonZeroSub()

(x - y) != 0 is true iff x != y, so use the isKnownNonEqual()
helper, which knows some additional tricks.
---
 llvm/lib/Analysis/ValueTracking.cpp                | 11 ++---------
 llvm/test/Analysis/ValueTracking/known-non-zero.ll |  5 +----
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 9ae05a4b5ccc72..45fdd4eda47d76 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2368,19 +2368,12 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
                          const SimplifyQuery &Q, unsigned BitWidth, Value *X,
                          Value *Y) {
+  // TODO: Move this case into isKnownNonEqual().
   if (auto *C = dyn_cast<Constant>(X))
     if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q))
       return true;
 
-  KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
-  if (XKnown.isUnknown())
-    return false;
-  KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
-  // If X != Y then X - Y is non zero.
-  std::optional<bool> ne = KnownBits::ne(XKnown, YKnown);
-  // If we are unable to compute if X != Y, we won't be able to do anything
-  // computing the knownbits of the sub expression so just return here.
-  return ne && *ne;
+  return ::isKnownNonEqual(X, Y, Depth, Q);
 }
 
 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
index d8dbd1a1b0bda4..c8e17f8dcc69f8 100644
--- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll
+++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
@@ -1222,10 +1222,7 @@ define i1 @sub_via_non_eq(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_via_non_eq(
 ; CHECK-NEXT:    [[NE:%.*]] = icmp ne i8 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NE]])
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i8 [[X]], 3
-; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[SHL]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SUB]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 false
 ;
   %ne = icmp ne i8 %x, 0
   call void @llvm.assume(i1 %ne)

From 465ecf872ec6c09fe610bf161c54031f6a372e95 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 18 Dec 2023 12:36:19 +0100
Subject: [PATCH 10/17] [InstCombine] Rename UndefElts -> PoisonElts (NFC)

In line with updated shufflevector semantics, this represents the
poison elements rather than undef elements now. This commit is a
pure rename, without any logic changes.
---
 .../InstCombine/InstCombineCalls.cpp          |  20 +--
 .../InstCombine/InstCombineInternal.h         |   2 +-
 .../InstCombine/InstCombineSelect.cpp         |   4 +-
 .../InstCombineSimplifyDemanded.cpp           | 122 +++++++++---------
 .../InstCombine/InstCombineVectorOps.cpp      |  17 +--
 .../InstCombine/InstructionCombining.cpp      |  10 +-
 6 files changed, 88 insertions(+), 87 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1539fa9a3269e1..c496f9c7419b5f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -357,9 +357,9 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
 
   // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
   APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
-  APInt UndefElts(DemandedElts.getBitWidth(), 0);
-  if (Value *V =
-          SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
+  APInt PoisonElts(DemandedElts.getBitWidth(), 0);
+  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
+                                            PoisonElts))
     return replaceOperand(II, 0, V);
 
   return nullptr;
@@ -439,12 +439,12 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
 
   // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
   APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
-  APInt UndefElts(DemandedElts.getBitWidth(), 0);
-  if (Value *V =
-          SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
+  APInt PoisonElts(DemandedElts.getBitWidth(), 0);
+  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
+                                            PoisonElts))
     return replaceOperand(II, 0, V);
-  if (Value *V =
-          SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
+  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
+                                            PoisonElts))
     return replaceOperand(II, 1, V);
 
   return nullptr;
@@ -1526,9 +1526,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   // support.
   if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
     auto VWidth = IIFVTy->getNumElements();
-    APInt UndefElts(VWidth, 0);
+    APInt PoisonElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
-    if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+    if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
       if (V != II)
         return replaceInstUsesWith(*II, V);
       return II;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 1d50fa9b6bf74b..f86db698ef8f12 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -550,7 +550,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);
 
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
-                                    APInt &UndefElts, unsigned Depth = 0,
+                                    APInt &PoisonElts, unsigned Depth = 0,
                                     bool AllowMultipleUsers = false) override;
 
   /// Canonicalize the position of binops relative to shufflevector.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2dda46986f0fd0..20bf00344b144b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2440,9 +2440,9 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
     return nullptr;
 
   unsigned NumElts = VecTy->getNumElements();
-  APInt UndefElts(NumElts, 0);
+  APInt PoisonElts(NumElts, 0);
   APInt AllOnesEltMask(APInt::getAllOnes(NumElts));
-  if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) {
+  if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, PoisonElts)) {
     if (V != &Sel)
       return replaceInstUsesWith(Sel, V);
     return &Sel;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 2490f5b9b97eb8..a8ed6fe1432d94 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1319,8 +1319,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
 }
 
 /// The specified value produces a vector with any number of elements.
-/// This method analyzes which elements of the operand are undef or poison and
-/// returns that information in UndefElts.
+/// This method analyzes which elements of the operand are poison and
+/// returns that information in PoisonElts.
 ///
 /// DemandedElts contains the set of elements that are actually used by the
 /// caller, and by default (AllowMultipleUsers equals false) the value is
@@ -1333,7 +1333,7 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
 /// returned.  This returns null if no change was made.
 Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
                                                     APInt DemandedElts,
-                                                    APInt &UndefElts,
+                                                    APInt &PoisonElts,
                                                     unsigned Depth,
                                                     bool AllowMultipleUsers) {
   // Cannot analyze scalable type. The number of vector elements is not a
@@ -1347,16 +1347,16 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
 
   if (match(V, m_Undef())) {
     // If the entire vector is undef or poison, just return this info.
-    UndefElts = EltMask;
+    PoisonElts = EltMask;
     return nullptr;
   }
 
   if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
-    UndefElts = EltMask;
+    PoisonElts = EltMask;
     return PoisonValue::get(V->getType());
   }
 
-  UndefElts = 0;
+  PoisonElts = 0;
 
   if (auto *C = dyn_cast<Constant>(V)) {
     // Check if this is identity. If so, return 0 since we are not simplifying
@@ -1370,7 +1370,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     for (unsigned i = 0; i != VWidth; ++i) {
       if (!DemandedElts[i]) {   // If not demanded, set to poison.
         Elts.push_back(Poison);
-        UndefElts.setBit(i);
+        PoisonElts.setBit(i);
         continue;
       }
 
@@ -1379,7 +1379,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
 
       Elts.push_back(Elt);
       if (isa<PoisonValue>(Elt)) // Already poison.
-        UndefElts.setBit(i);
+        PoisonElts.setBit(i);
     }
 
     // If we changed the constant, return it.
@@ -1400,7 +1400,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       // They'll be handled when it's their turn to be visited by
       // the main instcombine process.
       if (Depth != 0)
-        // TODO: Just compute the UndefElts information recursively.
+        // TODO: Just compute the PoisonElts information recursively.
         return nullptr;
 
       // Conservatively assume that all elements are needed.
@@ -1422,8 +1422,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     }
   };
 
-  APInt UndefElts2(VWidth, 0);
-  APInt UndefElts3(VWidth, 0);
+  APInt PoisonElts2(VWidth, 0);
+  APInt PoisonElts3(VWidth, 0);
   switch (I->getOpcode()) {
   default: break;
 
@@ -1449,17 +1449,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       if (i == 0 ? match(I->getOperand(i), m_Undef())
                  : match(I->getOperand(i), m_Poison())) {
         // If the entire vector is undefined, just return this info.
-        UndefElts = EltMask;
+        PoisonElts = EltMask;
         return nullptr;
       }
       if (I->getOperand(i)->getType()->isVectorTy()) {
-        APInt UndefEltsOp(VWidth, 0);
-        simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
+        APInt PoisonEltsOp(VWidth, 0);
+        simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);
         // gep(x, undef) is not undef, so skip considering idx ops here
         // Note that we could propagate poison, but we can't distinguish between
         // undef & poison bits ATM
         if (i == 0)
-          UndefElts |= UndefEltsOp;
+          PoisonElts |= PoisonEltsOp;
       }
     }
 
@@ -1472,7 +1472,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     if (!Idx) {
       // Note that we can't propagate undef elt info, because we don't know
       // which elt is getting updated.
-      simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
+      simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);
       break;
     }
 
@@ -1487,7 +1487,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     // was extracted from the same index in another vector with the same type,
     // replace this insert with that other vector.
     // Note: This is attempted before the call to simplifyAndSetOp because that
-    //       may change UndefElts to a value that does not match with Vec.
+    //       may change PoisonElts to a value that does not match with Vec.
     Value *Vec;
     if (PreInsertDemandedElts == 0 &&
         match(I->getOperand(1),
@@ -1496,7 +1496,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       return Vec;
     }
 
-    simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
+    simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);
 
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
@@ -1506,7 +1506,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     }
 
     // The inserted element is defined.
-    UndefElts.clearBit(IdxNo);
+    PoisonElts.clearBit(IdxNo);
     break;
   }
   case Instruction::ShuffleVector: {
@@ -1525,12 +1525,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
         MadeChange = true;
       }
       APInt LeftDemanded(OpWidth, 1);
-      APInt LHSUndefElts(OpWidth, 0);
-      simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
-      if (LHSUndefElts[0])
-        UndefElts = EltMask;
+      APInt LHSPoisonElts(OpWidth, 0);
+      simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
+      if (LHSPoisonElts[0])
+        PoisonElts = EltMask;
       else
-        UndefElts.clearAllBits();
+        PoisonElts.clearAllBits();
       break;
     }
 
@@ -1549,11 +1549,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       }
     }
 
-    APInt LHSUndefElts(OpWidth, 0);
-    simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
+    APInt LHSPoisonElts(OpWidth, 0);
+    simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
 
-    APInt RHSUndefElts(OpWidth, 0);
-    simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
+    APInt RHSPoisonElts(OpWidth, 0);
+    simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);
 
     // If this shuffle does not change the vector length and the elements
     // demanded by this shuffle are an identity mask, then this shuffle is
@@ -1579,7 +1579,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
         return Shuffle->getOperand(0);
     }
 
-    bool NewUndefElts = false;
+    bool NewPoisonElts = false;
     unsigned LHSIdx = -1u, LHSValIdx = -1u;
     unsigned RHSIdx = -1u, RHSValIdx = -1u;
     bool LHSUniform = true;
@@ -1587,23 +1587,23 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     for (unsigned i = 0; i < VWidth; i++) {
       unsigned MaskVal = Shuffle->getMaskValue(i);
       if (MaskVal == -1u) {
-        UndefElts.setBit(i);
+        PoisonElts.setBit(i);
       } else if (!DemandedElts[i]) {
-        NewUndefElts = true;
-        UndefElts.setBit(i);
+        NewPoisonElts = true;
+        PoisonElts.setBit(i);
       } else if (MaskVal < OpWidth) {
-        if (LHSUndefElts[MaskVal]) {
-          NewUndefElts = true;
-          UndefElts.setBit(i);
+        if (LHSPoisonElts[MaskVal]) {
+          NewPoisonElts = true;
+          PoisonElts.setBit(i);
         } else {
           LHSIdx = LHSIdx == -1u ? i : OpWidth;
           LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
           LHSUniform = LHSUniform && (MaskVal == i);
         }
       } else {
-        if (RHSUndefElts[MaskVal - OpWidth]) {
-          NewUndefElts = true;
-          UndefElts.setBit(i);
+        if (RHSPoisonElts[MaskVal - OpWidth]) {
+          NewPoisonElts = true;
+          PoisonElts.setBit(i);
         } else {
           RHSIdx = RHSIdx == -1u ? i : OpWidth;
           RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
@@ -1646,11 +1646,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
         return New;
       }
     }
-    if (NewUndefElts) {
+    if (NewPoisonElts) {
       // Add additional discovered undefs.
       SmallVector<int, 16> Elts;
       for (unsigned i = 0; i < VWidth; ++i) {
-        if (UndefElts[i])
+        if (PoisonElts[i])
           Elts.push_back(PoisonMaskElem);
         else
           Elts.push_back(Shuffle->getMaskValue(i));
@@ -1665,12 +1665,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     // on the current demanded elements.
     SelectInst *Sel = cast<SelectInst>(I);
     if (Sel->getCondition()->getType()->isVectorTy()) {
-      // TODO: We are not doing anything with UndefElts based on this call.
+      // TODO: We are not doing anything with PoisonElts based on this call.
       // It is overwritten below based on the other select operands. If an
       // element of the select condition is known undef, then we are free to
       // choose the output value from either arm of the select. If we know that
       // one of those values is undef, then the output can be undef.
-      simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+      simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
     }
 
     // Next, see if we can transform the arms of the select.
@@ -1692,12 +1692,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       }
     }
 
-    simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
-    simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
+    simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);
+    simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);
 
     // Output elements are undefined if the element from each arm is undefined.
     // TODO: This can be improved. See comment in select condition handling.
-    UndefElts = UndefElts2 & UndefElts3;
+    PoisonElts = PoisonElts2 & PoisonElts3;
     break;
   }
   case Instruction::BitCast: {
@@ -1706,7 +1706,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     if (!VTy) break;
     unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
     APInt InputDemandedElts(InVWidth, 0);
-    UndefElts2 = APInt(InVWidth, 0);
+    PoisonElts2 = APInt(InVWidth, 0);
     unsigned Ratio;
 
     if (VWidth == InVWidth) {
@@ -1735,25 +1735,25 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       break;
     }
 
-    simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
+    simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);
 
     if (VWidth == InVWidth) {
-      UndefElts = UndefElts2;
+      PoisonElts = PoisonElts2;
     } else if ((VWidth % InVWidth) == 0) {
       // If the number of elements in the output is a multiple of the number of
       // elements in the input then an output element is undef if the
       // corresponding input element is undef.
       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
-        if (UndefElts2[OutIdx / Ratio])
-          UndefElts.setBit(OutIdx);
+        if (PoisonElts2[OutIdx / Ratio])
+          PoisonElts.setBit(OutIdx);
     } else if ((InVWidth % VWidth) == 0) {
       // If the number of elements in the input is a multiple of the number of
       // elements in the output then an output element is undef if all of the
       // corresponding input elements are undef.
       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
-        APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
+        APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
         if (SubUndef.popcount() == Ratio)
-          UndefElts.setBit(OutIdx);
+          PoisonElts.setBit(OutIdx);
       }
     } else {
       llvm_unreachable("Unimp");
@@ -1762,7 +1762,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
   }
   case Instruction::FPTrunc:
   case Instruction::FPExt:
-    simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+    simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
     break;
 
   case Instruction::Call: {
@@ -1785,18 +1785,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
             DemandedPassThrough.clearBit(i);
         }
       if (II->getIntrinsicID() == Intrinsic::masked_gather)
-        simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
-      simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+        simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
+      simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);
 
       // Output elements are undefined if the element from both sources are.
       // TODO: can strengthen via mask as well.
-      UndefElts = UndefElts2 & UndefElts3;
+      PoisonElts = PoisonElts2 & PoisonElts3;
       break;
     }
     default: {
       // Handle target specific intrinsics
       std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
-          *II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+          *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
           simplifyAndSetOp);
       if (V)
         return *V;
@@ -1859,17 +1859,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
         return ShufBO;
     }
 
-    simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
-    simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
+    simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
+    simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);
 
     // Output elements are undefined if both are undefined. Consider things
     // like undef & 0. The result is known zero, not undef.
-    UndefElts &= UndefElts2;
+    PoisonElts &= PoisonElts2;
   }
 
   // If we've proven all of the lanes undef, return an undef value.
   // TODO: Intersect w/demanded lanes
-  if (UndefElts.isAllOnes())
+  if (PoisonElts.isAllOnes())
     return UndefValue::get(I->getType());
 
   return MadeChange ? I : nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c8b58c51d4e6ec..c381d25011f688 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -581,20 +581,20 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
       // If the input vector has a single use, simplify it based on this use
       // property.
       if (SrcVec->hasOneUse()) {
-        APInt UndefElts(NumElts, 0);
+        APInt PoisonElts(NumElts, 0);
         APInt DemandedElts(NumElts, 0);
         DemandedElts.setBit(IndexC->getZExtValue());
         if (Value *V =
-                SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
+                SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
           return replaceOperand(EI, 0, V);
       } else {
         // If the input vector has multiple uses, simplify it based on a union
         // of all elements used.
         APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
         if (!DemandedElts.isAllOnes()) {
-          APInt UndefElts(NumElts, 0);
+          APInt PoisonElts(NumElts, 0);
           if (Value *V = SimplifyDemandedVectorElts(
-                  SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
+                  SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
                   true /* AllowMultipleUsers */)) {
             if (V != SrcVec) {
               Worklist.addValue(SrcVec);
@@ -1713,9 +1713,10 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
 
   if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
     unsigned VWidth = VecTy->getNumElements();
-    APInt UndefElts(VWidth, 0);
+    APInt PoisonElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
-    if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+    if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
+                                              PoisonElts)) {
       if (V != &IE)
         return replaceInstUsesWith(IE, V);
       return &IE;
@@ -2855,9 +2856,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (Instruction *I = foldCastShuffle(SVI, Builder))
     return I;
 
-  APInt UndefElts(VWidth, 0);
+  APInt PoisonElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
-  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
     if (V != &SVI)
       return replaceInstUsesWith(SVI, V);
     return &SVI;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a7ddadc25de43c..94f60719b78ca3 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -173,14 +173,14 @@ std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
 }
 
 std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
-    IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
-    APInt &UndefElts3,
+    IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
+    APInt &PoisonElts2, APInt &PoisonElts3,
     std::function<void(Instruction *, unsigned, APInt, APInt &)>
         SimplifyAndSetOp) {
   // Handle target specific intrinsics
   if (II.getCalledFunction()->isTargetIntrinsic()) {
     return TTI.simplifyDemandedVectorEltsIntrinsic(
-        *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+        *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
         SimplifyAndSetOp);
   }
   return std::nullopt;
@@ -2241,10 +2241,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // compile-time.
   if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
     auto VWidth = GEPFVTy->getNumElements();
-    APInt UndefElts(VWidth, 0);
+    APInt PoisonElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
     if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
-                                              UndefElts)) {
+                                              PoisonElts)) {
       if (V != &GEP)
         return replaceInstUsesWith(GEP, V);
       return &GEP;

From 42239d2e96c5683a2973c39ea7ea20b616218b66 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Mon, 18 Dec 2023 12:37:29 +0100
Subject: [PATCH 11/17] [clang] Fix CTAD not respect default template arguments
 that were added after the definition. (#75569)

Fixes https://github.com/llvm/llvm-project/issues/69987
---
 clang/docs/ReleaseNotes.rst      |  3 +++
 clang/lib/Sema/SemaTemplate.cpp  | 28 ++++++++++++++++------------
 clang/test/SemaTemplate/ctad.cpp | 10 ++++++++++
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 26ba4f8f72508a..2e32f8b36d23de 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -688,6 +688,9 @@ Bug Fixes in This Version
   (`#62157 <https://github.com/llvm/llvm-project/issues/62157>`_) and
   (`#64885 <https://github.com/llvm/llvm-project/issues/64885>`_) and
   (`#65568 <https://github.com/llvm/llvm-project/issues/65568>`_)
+- Fix an issue where clang doesn't respect detault template arguments that
+  are added in a later redeclaration for CTAD.
+  Fixes (#69987 <https://github.com/llvm/llvm-project/issues/69987>`_)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index f10abeaba0d451..5fcc39ec700522 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1824,6 +1824,15 @@ static void SetNestedNameSpecifier(Sema &S, TagDecl *T,
     T->setQualifierInfo(SS.getWithLocInContext(S.Context));
 }
 
+// Returns the template parameter list with all default template argument
+// information.
+static TemplateParameterList *GetTemplateParameterList(TemplateDecl *TD) {
+  // Make sure we get the template parameter list from the most
+  // recent declaration, since that is the only one that is guaranteed to
+  // have all the default template argument information.
+  return cast<TemplateDecl>(TD->getMostRecentDecl())->getTemplateParameters();
+}
+
 DeclResult Sema::CheckClassTemplate(
     Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc,
     CXXScopeSpec &SS, IdentifierInfo *Name, SourceLocation NameLoc,
@@ -2061,13 +2070,13 @@ DeclResult Sema::CheckClassTemplate(
   if (!(TUK == TUK_Friend && CurContext->isDependentContext()) &&
       CheckTemplateParameterList(
           TemplateParams,
-          PrevClassTemplate
-              ? PrevClassTemplate->getMostRecentDecl()->getTemplateParameters()
-              : nullptr,
+          PrevClassTemplate ? GetTemplateParameterList(PrevClassTemplate)
+                            : nullptr,
           (SS.isSet() && SemanticContext && SemanticContext->isRecord() &&
            SemanticContext->isDependentContext())
               ? TPC_ClassTemplateMember
-              : TUK == TUK_Friend ? TPC_FriendClassTemplate : TPC_ClassTemplate,
+          : TUK == TUK_Friend ? TPC_FriendClassTemplate
+                              : TPC_ClassTemplate,
           SkipBody))
     Invalid = true;
 
@@ -2298,7 +2307,7 @@ struct ConvertConstructorToDeductionGuideTransform {
     //    -- The template parameters are the template parameters of the class
     //       template followed by the template parameters (including default
     //       template arguments) of the constructor, if any.
-    TemplateParameterList *TemplateParams = Template->getTemplateParameters();
+    TemplateParameterList *TemplateParams = GetTemplateParameterList(Template);
     if (FTD) {
       TemplateParameterList *InnerParams = FTD->getTemplateParameters();
       SmallVector<NamedDecl *, 16> AllParams;
@@ -2424,7 +2433,7 @@ struct ConvertConstructorToDeductionGuideTransform {
       Params.push_back(NewParam);
     }
 
-    return buildDeductionGuide(Template->getTemplateParameters(), nullptr,
+    return buildDeductionGuide(GetTemplateParameterList(Template), nullptr,
                                ExplicitSpecifier(), TSI, Loc, Loc, Loc);
   }
 
@@ -5956,12 +5965,7 @@ bool Sema::CheckTemplateArgumentList(
   // template.
   TemplateArgumentListInfo NewArgs = TemplateArgs;
 
-  // Make sure we get the template parameter list from the most
-  // recent declaration, since that is the only one that is guaranteed to
-  // have all the default template argument information.
-  TemplateParameterList *Params =
-      cast<TemplateDecl>(Template->getMostRecentDecl())
-          ->getTemplateParameters();
+  TemplateParameterList *Params = GetTemplateParameterList(Template);
 
   SourceLocation RAngleLoc = NewArgs.getRAngleLoc();
 
diff --git a/clang/test/SemaTemplate/ctad.cpp b/clang/test/SemaTemplate/ctad.cpp
index 4d836839d8c346..388ed7d4cced18 100644
--- a/clang/test/SemaTemplate/ctad.cpp
+++ b/clang/test/SemaTemplate/ctad.cpp
@@ -44,3 +44,13 @@ namespace Access {
   };
   D z = {Z(), {}};
 }
+
+namespace GH69987 {
+template<class> struct X {};
+template<class = void> struct X;
+X x;
+
+template<class T, class B> struct Y { Y(T); };
+template<class T, class B=void> struct Y ;
+Y y(1);
+};

From 9c18f031968aabc7698e9ab0ea94da3b665d4d5a Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Mon, 18 Dec 2023 12:43:10 +0100
Subject: [PATCH 12/17] [libc++] Adds headers to FTM. (#75699)

These feature-test macros had no headers listed in their associated
papers. This adds the expected headers.

Fixes https://github.com/llvm/llvm-project/issues/75577
---
 libcxx/include/version                        |   4 +-
 .../.version.compile.pass.cpp                 | 105 ------------------
 .../format.version.compile.pass.cpp           |  34 ++++++
 .../numeric.version.compile.pass.cpp          |  46 +++++++-
 .../generate_feature_test_macro_components.py |  10 +-
 5 files changed, 83 insertions(+), 116 deletions(-)
 delete mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp

diff --git a/libcxx/include/version b/libcxx/include/version
index a91c344c436090..7aa002e257b0a1 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -93,7 +93,7 @@ __cpp_lib_expected                                      202211L <expected>
 __cpp_lib_filesystem                                    201703L <filesystem>
 __cpp_lib_format                                        202106L <format>
 __cpp_lib_format_ranges                                 202207L <format>
-__cpp_lib_format_uchar                                  202311L <>
+__cpp_lib_format_uchar                                  202311L <format>
 __cpp_lib_formatters                                    202302L <stacktrace> <thread>
 __cpp_lib_forward_like                                  202207L <utility>
 __cpp_lib_freestanding_algorithm                        202311L <algorithm>
@@ -188,7 +188,7 @@ __cpp_lib_remove_cvref                                  201711L <type_traits>
 __cpp_lib_result_of_sfinae                              201210L <functional> <type_traits>
 __cpp_lib_robust_nonmodifying_seq_ops                   201304L <algorithm>
 __cpp_lib_sample                                        201603L <algorithm>
-__cpp_lib_saturation_arithmetic                         202311L <>
+__cpp_lib_saturation_arithmetic                         202311L <numeric>
 __cpp_lib_scoped_lock                                   201703L <mutex>
 __cpp_lib_semaphore                                     201907L <semaphore>
 __cpp_lib_shared_mutex                                  201505L <shared_mutex>
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp
deleted file mode 100644
index 2486985cefaca0..00000000000000
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/.version.compile.pass.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// WARNING: This test was generated by generate_feature_test_macro_components.py
-// and should not be edited manually.
-//
-// clang-format off
-
-// <>
-
-// Test the feature test macros defined by <>
-
-/*  Constant                           Value
-    __cpp_lib_format_uchar             202311L [C++20]
-    __cpp_lib_saturation_arithmetic    202311L [C++26]
-*/
-
-#include <>
-#include "test_macros.h"
-
-#if TEST_STD_VER < 14
-
-# ifdef __cpp_lib_format_uchar
-#   error "__cpp_lib_format_uchar should not be defined before c++20"
-# endif
-
-# ifdef __cpp_lib_saturation_arithmetic
-#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
-# endif
-
-#elif TEST_STD_VER == 14
-
-# ifdef __cpp_lib_format_uchar
-#   error "__cpp_lib_format_uchar should not be defined before c++20"
-# endif
-
-# ifdef __cpp_lib_saturation_arithmetic
-#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
-# endif
-
-#elif TEST_STD_VER == 17
-
-# ifdef __cpp_lib_format_uchar
-#   error "__cpp_lib_format_uchar should not be defined before c++20"
-# endif
-
-# ifdef __cpp_lib_saturation_arithmetic
-#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
-# endif
-
-#elif TEST_STD_VER == 20
-
-# ifndef __cpp_lib_format_uchar
-#   error "__cpp_lib_format_uchar should be defined in c++20"
-# endif
-# if __cpp_lib_format_uchar != 202311L
-#   error "__cpp_lib_format_uchar should have the value 202311L in c++20"
-# endif
-
-# ifdef __cpp_lib_saturation_arithmetic
-#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
-# endif
-
-#elif TEST_STD_VER == 23
-
-# ifndef __cpp_lib_format_uchar
-#   error "__cpp_lib_format_uchar should be defined in c++23"
-# endif
-# if __cpp_lib_format_uchar != 202311L
-#   error "__cpp_lib_format_uchar should have the value 202311L in c++23"
-# endif
-
-# ifdef __cpp_lib_saturation_arithmetic
-#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
-# endif
-
-#elif TEST_STD_VER > 23
-
-# ifndef __cpp_lib_format_uchar
-#   error "__cpp_lib_format_uchar should be defined in c++26"
-# endif
-# if __cpp_lib_format_uchar != 202311L
-#   error "__cpp_lib_format_uchar should have the value 202311L in c++26"
-# endif
-
-# if !defined(_LIBCPP_VERSION)
-#   ifndef __cpp_lib_saturation_arithmetic
-#     error "__cpp_lib_saturation_arithmetic should be defined in c++26"
-#   endif
-#   if __cpp_lib_saturation_arithmetic != 202311L
-#     error "__cpp_lib_saturation_arithmetic should have the value 202311L in c++26"
-#   endif
-# else // _LIBCPP_VERSION
-#   ifdef __cpp_lib_saturation_arithmetic
-#     error "__cpp_lib_saturation_arithmetic should not be defined because it is unimplemented in libc++!"
-#   endif
-# endif
-
-#endif // TEST_STD_VER > 23
-
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp
index 4e2c50483c2298..aa7b2f4286827b 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/format.version.compile.pass.cpp
@@ -18,6 +18,7 @@
 /*  Constant                   Value
     __cpp_lib_format           202106L [C++20]
     __cpp_lib_format_ranges    202207L [C++23]
+    __cpp_lib_format_uchar     202311L [C++20]
 */
 
 #include <format>
@@ -33,6 +34,10 @@
 #   error "__cpp_lib_format_ranges should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_format_uchar
+#   error "__cpp_lib_format_uchar should not be defined before c++20"
+# endif
+
 #elif TEST_STD_VER == 14
 
 # ifdef __cpp_lib_format
@@ -43,6 +48,10 @@
 #   error "__cpp_lib_format_ranges should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_format_uchar
+#   error "__cpp_lib_format_uchar should not be defined before c++20"
+# endif
+
 #elif TEST_STD_VER == 17
 
 # ifdef __cpp_lib_format
@@ -53,6 +62,10 @@
 #   error "__cpp_lib_format_ranges should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_format_uchar
+#   error "__cpp_lib_format_uchar should not be defined before c++20"
+# endif
+
 #elif TEST_STD_VER == 20
 
 # if !defined(_LIBCPP_VERSION)
@@ -72,6 +85,13 @@
 #   error "__cpp_lib_format_ranges should not be defined before c++23"
 # endif
 
+# ifndef __cpp_lib_format_uchar
+#   error "__cpp_lib_format_uchar should be defined in c++20"
+# endif
+# if __cpp_lib_format_uchar != 202311L
+#   error "__cpp_lib_format_uchar should have the value 202311L in c++20"
+# endif
+
 #elif TEST_STD_VER == 23
 
 # if !defined(_LIBCPP_VERSION)
@@ -94,6 +114,13 @@
 #   error "__cpp_lib_format_ranges should have the value 202207L in c++23"
 # endif
 
+# ifndef __cpp_lib_format_uchar
+#   error "__cpp_lib_format_uchar should be defined in c++23"
+# endif
+# if __cpp_lib_format_uchar != 202311L
+#   error "__cpp_lib_format_uchar should have the value 202311L in c++23"
+# endif
+
 #elif TEST_STD_VER > 23
 
 # if !defined(_LIBCPP_VERSION)
@@ -116,5 +143,12 @@
 #   error "__cpp_lib_format_ranges should have the value 202207L in c++26"
 # endif
 
+# ifndef __cpp_lib_format_uchar
+#   error "__cpp_lib_format_uchar should be defined in c++26"
+# endif
+# if __cpp_lib_format_uchar != 202311L
+#   error "__cpp_lib_format_uchar should have the value 202311L in c++26"
+# endif
+
 #endif // TEST_STD_VER > 23
 
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp
index 60004b06c5ff52..b510eefc69a5d3 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/numeric.version.compile.pass.cpp
@@ -15,12 +15,13 @@
 
 // Test the feature test macros defined by <numeric>
 
-/*  Constant                        Value
-    __cpp_lib_constexpr_numeric     201911L [C++20]
-    __cpp_lib_gcd_lcm               201606L [C++17]
-    __cpp_lib_interpolate           201902L [C++20]
-    __cpp_lib_parallel_algorithm    201603L [C++17]
-    __cpp_lib_ranges_iota           202202L [C++23]
+/*  Constant                           Value
+    __cpp_lib_constexpr_numeric        201911L [C++20]
+    __cpp_lib_gcd_lcm                  201606L [C++17]
+    __cpp_lib_interpolate              201902L [C++20]
+    __cpp_lib_parallel_algorithm       201603L [C++17]
+    __cpp_lib_ranges_iota              202202L [C++23]
+    __cpp_lib_saturation_arithmetic    202311L [C++26]
 */
 
 #include <numeric>
@@ -48,6 +49,10 @@
 #   error "__cpp_lib_ranges_iota should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_saturation_arithmetic
+#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
+# endif
+
 #elif TEST_STD_VER == 14
 
 # ifdef __cpp_lib_constexpr_numeric
@@ -70,6 +75,10 @@
 #   error "__cpp_lib_ranges_iota should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_saturation_arithmetic
+#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
+# endif
+
 #elif TEST_STD_VER == 17
 
 # ifdef __cpp_lib_constexpr_numeric
@@ -104,6 +113,10 @@
 #   error "__cpp_lib_ranges_iota should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_saturation_arithmetic
+#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
+# endif
+
 #elif TEST_STD_VER == 20
 
 # ifndef __cpp_lib_constexpr_numeric
@@ -144,6 +157,10 @@
 #   error "__cpp_lib_ranges_iota should not be defined before c++23"
 # endif
 
+# ifdef __cpp_lib_saturation_arithmetic
+#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
+# endif
+
 #elif TEST_STD_VER == 23
 
 # ifndef __cpp_lib_constexpr_numeric
@@ -193,6 +210,10 @@
 #   endif
 # endif
 
+# ifdef __cpp_lib_saturation_arithmetic
+#   error "__cpp_lib_saturation_arithmetic should not be defined before c++26"
+# endif
+
 #elif TEST_STD_VER > 23
 
 # ifndef __cpp_lib_constexpr_numeric
@@ -242,5 +263,18 @@
 #   endif
 # endif
 
+# if !defined(_LIBCPP_VERSION)
+#   ifndef __cpp_lib_saturation_arithmetic
+#     error "__cpp_lib_saturation_arithmetic should be defined in c++26"
+#   endif
+#   if __cpp_lib_saturation_arithmetic != 202311L
+#     error "__cpp_lib_saturation_arithmetic should have the value 202311L in c++26"
+#   endif
+# else // _LIBCPP_VERSION
+#   ifdef __cpp_lib_saturation_arithmetic
+#     error "__cpp_lib_saturation_arithmetic should not be defined because it is unimplemented in libc++!"
+#   endif
+# endif
+
 #endif // TEST_STD_VER > 23
 
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 6a30324397883a..70e18b6dde006e 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -486,7 +486,9 @@ def add_version_header(tc):
             "values": {
                 "c++20": 202311  # DR P2909R4 Fix formatting of code units as integers
             },
-            "headers": [""],  # Note not in format
+            "headers": [
+                "format"  # TODO verify this entry since the paper was underspecified.
+            ],
         },
         {
             "name": "__cpp_lib_formatters",
@@ -660,7 +662,7 @@ def add_version_header(tc):
         },
         {
             "name": "__cpp_lib_ios_noreplace",
-            "values": { "c++23": 202207 },
+            "values": {"c++23": 202207},
             "headers": ["ios"],
         },
         {
@@ -1010,7 +1012,9 @@ def add_version_header(tc):
         {
             "name": "__cpp_lib_saturation_arithmetic",
             "values": {"c++26": 202311},  # P0543R3 Saturation arithmetic
-            "headers": [""],  # Note not in <numerics>
+            "headers": [
+                "numeric"  # TODO verify this entry since the paper was underspecified.
+            ],
             "unimplemented": True,
         },
         {

From 3b08b3340d5adbd001cd1b328f6fd6ecf5d92f4a Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@igalia.com>
Date: Mon, 18 Dec 2023 11:45:21 +0000
Subject: [PATCH 13/17] [RISCV][test] Fix lifetime bug with Module in test

As pointed out by @s-barannikov in post-commit review of #72356, the
module is destroyed at the end of the constructor.
---
 llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp
index b4c96a9c2a62ce..5ef86bb3f7b46e 100644
--- a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp
+++ b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp
@@ -31,6 +31,7 @@ class RISCVInstrInfoTest : public testing::TestWithParam<const char *> {
   std::unique_ptr<RISCVSubtarget> ST;
   std::unique_ptr<MachineModuleInfo> MMI;
   std::unique_ptr<MachineFunction> MF;
+  std::unique_ptr<Module> M;
 
   static void SetUpTestSuite() {
     LLVMInitializeRISCVTargetInfo();
@@ -49,10 +50,10 @@ class RISCVInstrInfoTest : public testing::TestWithParam<const char *> {
         CodeGenOptLevel::Default)));
 
     Ctx = std::make_unique<LLVMContext>();
-    Module M("Module", *Ctx);
-    M.setDataLayout(TM->createDataLayout());
+    M = std::make_unique<Module>("Module", *Ctx);
+    M->setDataLayout(TM->createDataLayout());
     auto *FType = FunctionType::get(Type::getVoidTy(*Ctx), false);
-    auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", &M);
+    auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", *M);
     MMI = std::make_unique<MachineModuleInfo>(TM.get());
 
     ST = std::make_unique<RISCVSubtarget>(

From a00c4220be6a9ce17e2a0b88191bdd08e65bf1ad Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Mon, 18 Dec 2023 12:44:25 +0100
Subject: [PATCH 14/17] [SystemZ] Fix complex address matching when i128 is
 legal

Complex address matching currently handles truncations, under
the assumption that those are no-ops.  This is no longer true
when i128 is legal.  Change the code to only handle actual
no-op truncations.

Fixes https://github.com/llvm/llvm-project/issues/75708
Fixes https://github.com/llvm/llvm-project/issues/75714
---
 .../Target/SystemZ/SystemZISelDAGToDAG.cpp    |  3 +-
 llvm/test/CodeGen/SystemZ/addr-04.ll          | 46 +++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/addr-04.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index e5e1e91916f32d..c7d8591c5bdf6f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -465,7 +465,8 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
                                         bool IsBase) const {
   SDValue N = IsBase ? AM.Base : AM.Index;
   unsigned Opcode = N.getOpcode();
-  if (Opcode == ISD::TRUNCATE) {
+  // Look through no-op truncations.
+  if (Opcode == ISD::TRUNCATE && N.getOperand(0).getValueSizeInBits() <= 64) {
     N = N.getOperand(0);
     Opcode = N.getOpcode();
   }
diff --git a/llvm/test/CodeGen/SystemZ/addr-04.ll b/llvm/test/CodeGen/SystemZ/addr-04.ll
new file mode 100644
index 00000000000000..245623fea9b9ba
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/addr-04.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; Test complex addresses with base or index truncated from 128 bit.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Shift amount with base truncated from 128 bit to 32 bit.
+define void @f1(i128 %x, i32 %y, ptr %px, ptr %py) {
+; CHECK-LABEL: f1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    larl %r1, .LCPI0_0
+; CHECK-NEXT:    vl %v0, 0(%r2), 3
+; CHECK-NEXT:    vl %v1, 0(%r1), 3
+; CHECK-NEXT:    vaq %v0, %v0, %v1
+; CHECK-NEXT:    vlgvf %r1, %v0, 3
+; CHECK-NEXT:    srl %r3, 0(%r1)
+; CHECK-NEXT:    vst %v0, 0(%r4), 3
+; CHECK-NEXT:    st %r3, 0(%r5)
+; CHECK-NEXT:    br %r14
+  %x1 = add i128 %x, 1
+  store i128 %x1, ptr %px, align 8
+  %amt = trunc i128 %x1 to i32
+  %y1 = lshr i32 %y, %amt
+  store i32 %y1, ptr %py, align 4
+  ret void
+}
+
+; Memory address with index truncated from 128 bit to 64 bit.
+define i8 @f2(ptr %base, ptr %p) {
+; CHECK-LABEL: f2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    larl %r1, .LCPI1_0
+; CHECK-NEXT:    vl %v0, 0(%r3), 3
+; CHECK-NEXT:    vl %v1, 0(%r1), 3
+; CHECK-NEXT:    vaq %v0, %v0, %v1
+; CHECK-NEXT:    vlgvg %r1, %v0, 1
+; CHECK-NEXT:    vst %v0, 0(%r3), 3
+; CHECK-NEXT:    lb %r2, 0(%r1,%r2)
+; CHECK-NEXT:    br %r14
+  %idx = load i128, ptr %p, align 8
+  %inc = add nsw i128 %idx, 1
+  store i128 %inc, ptr %p, align 8
+  %idxprom = trunc i128 %inc to i64
+  %arrayidx = getelementptr inbounds i8, ptr %base, i64 %idxprom
+  %res = load i8, ptr %arrayidx, align 1
+  ret i8 %res
+}

From df3ddd78f640ebec74151028d919904c6cf9ecdd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 18 Dec 2023 11:51:13 +0000
Subject: [PATCH 15/17] CGBuiltin - fix gcc Wunused-variable warning. NFC.

---
 clang/lib/CodeGen/CGBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c96f86a823a461..c42b885289c4c7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -942,7 +942,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
                      : Builder.CreateZExtOrTrunc(FAMSize, ResType);
   Value *Res = FAMSize;
 
-  if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
+  if (isa<DeclRefExpr>(Base)) {
     // The whole struct is specificed in the __bdos.
     const RecordDecl *OuterRD =
         CountedByFD->getDeclContext()->getOuterLexicalRecordContext();

From 2f8178806770b549c054b02ae3556b454cdeb722 Mon Sep 17 00:00:00 2001
From: Serge Pavlov <sepavloff@gmail.com>
Date: Mon, 18 Dec 2023 18:57:36 +0700
Subject: [PATCH 16/17] [ARM][FPEnv] Lowering of fpmode intrinsics (#74054)

LLVM intrinsics `get_fpmode`, `set_fpmode` and `reset_fpmode` operate
control modes, the bits of FP environment that affect FP operations. On
ARM these bits are in FPSCR together with the status bits. The
implementation of these intrinsics produces code close to that of
functions `fegetmode` and `fesetmode` from GLIBC.

Pull request: https://github.com/llvm/llvm-project/pull/74054
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 58 +++++++++++++++
 llvm/lib/Target/ARM/ARMISelLowering.h   | 10 +++
 llvm/lib/Target/ARM/ARMInstrVFP.td      |  1 +
 llvm/test/CodeGen/ARM/fpenv.ll          | 98 +++++++++++++++++++++++++
 4 files changed, 167 insertions(+)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index db63facca870f8..d00b7853816e19 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1415,6 +1415,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);
     setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);
     setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);
+    setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
+    setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
+    setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
   }
 
   // We want to custom lower some of our intrinsics.
@@ -6447,6 +6450,57 @@ SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
   return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
 }
 
+SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+  SDValue Mode = Op->getOperand(1);
+
+  // Generate nodes to build:
+  // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
+  SDValue Ops[] = {Chain,
+                   DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
+  SDValue FPSCR =
+      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
+  Chain = FPSCR.getValue(1);
+  FPSCR = FPSCR.getValue(0);
+
+  SDValue FPSCRMasked =
+      DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
+                  DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
+  SDValue InputMasked =
+      DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
+                  DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
+  FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
+
+  SDValue Ops2[] = {
+      Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
+  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
+SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+
+  // To get the default FP mode all control bits are cleared:
+  // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
+  SDValue Ops[] = {Chain,
+                   DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
+  SDValue FPSCR =
+      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
+  Chain = FPSCR.getValue(1);
+  FPSCR = FPSCR.getValue(0);
+
+  SDValue FPSCRMasked = DAG.getNode(
+      ISD::AND, DL, MVT::i32, FPSCR,
+      DAG.getConstant(ARM::FPStatusBits | ARM::FPReservedBits, DL, MVT::i32));
+  SDValue Ops2[] = {Chain,
+                    DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
+                    FPSCRMasked};
+  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
                          const ARMSubtarget *ST) {
   SDLoc dl(N);
@@ -10557,6 +10611,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ZERO_EXTEND:   return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
   case ISD::GET_ROUNDING:  return LowerGET_ROUNDING(Op, DAG);
   case ISD::SET_ROUNDING:  return LowerSET_ROUNDING(Op, DAG);
+  case ISD::SET_FPMODE:
+    return LowerSET_FPMODE(Op, DAG);
+  case ISD::RESET_FPMODE:
+    return LowerRESET_FPMODE(Op, DAG);
   case ISD::MUL:           return LowerMUL(Op, DAG);
   case ISD::SDIV:
     if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 6c2b92de7a1df2..f398b01f418629 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -375,6 +375,14 @@ class VectorType;
 
   // Bit position of rounding mode bits in FPSCR.
   const unsigned RoundingBitsPos = 22;
+
+  // Bits of floating-point status. These are NZCV flags, QC bit and cumulative
+  // FP exception bits.
+  const unsigned FPStatusBits = 0xf800009f;
+
+  // Some bits in the FPSCR are not yet defined.  They must be preserved when
+  // modifying the contents.
+  const unsigned FPReservedBits = 0x00006060;
   } // namespace ARM
 
   /// Define some predicates that are used for node matching.
@@ -835,6 +843,8 @@ class VectorType;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
                             const ARMSubtarget *ST) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 800527bcf756c6..55d3efbd9b9a2b 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2675,6 +2675,7 @@ def : Pat<(get_fpenv), (VMRS)>;
 def : Pat<(set_fpenv GPRnopc:$Rt), (VMSR GPRnopc:$Rt)>;
 def : Pat<(reset_fpenv), (VMSR (MOVi 0))>, Requires<[IsARM]>;
 def : Pat<(reset_fpenv), (VMSR (tMOVi8 0))>, Requires<[IsThumb]>;
+def : Pat<(get_fpmode), (VMRS)>;
 
 //===----------------------------------------------------------------------===//
 // Assembler aliases.
diff --git a/llvm/test/CodeGen/ARM/fpenv.ll b/llvm/test/CodeGen/ARM/fpenv.ll
index 40db627ebb3c23..f5d87170d9153e 100644
--- a/llvm/test/CodeGen/ARM/fpenv.ll
+++ b/llvm/test/CodeGen/ARM/fpenv.ll
@@ -142,9 +142,107 @@ entry:
   ret void
 }
 
+define i32 @get_fpmode_01() #0 {
+; CHECK-LABEL: get_fpmode_01:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, sp, #8
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    bl fegetmode
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    add sp, sp, #8
+; CHECK-NEXT:    pop {r11, lr}
+; CHECK-NEXT:    mov pc, lr
+entry:
+  %fpenv = call i32 @llvm.get.fpmode.i32()
+  ret i32 %fpenv
+}
+
+define i32 @get_fpmode_02() nounwind {
+; CHECK-LABEL: get_fpmode_02:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmrs r0, fpscr
+; CHECK-NEXT:    mov pc, lr
+entry:
+  %fpenv = call i32 @llvm.get.fpmode.i32()
+  ret i32 %fpenv
+}
+
+define void @set_fpmode_01(i32 %fpmode) #0 {
+; CHECK-LABEL: set_fpmode_01:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, sp, #8
+; CHECK-NEXT:    str r0, [sp, #4]
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    bl fesetmode
+; CHECK-NEXT:    add sp, sp, #8
+; CHECK-NEXT:    pop {r11, lr}
+; CHECK-NEXT:    mov pc, lr
+entry:
+  call void @llvm.set.fpmode.i32(i32 %fpmode)
+  ret void
+}
+
+define void @set_fpmode_02(i32 %fpmode) nounwind {
+; CHECK-LABEL: set_fpmode_02:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmrs r1, fpscr
+; CHECK-NEXT:    mvn r2, #159
+; CHECK-NEXT:    sub r2, r2, #-134217728
+; CHECK-NEXT:    and r0, r0, r2
+; CHECK-NEXT:    mov r2, #159
+; CHECK-NEXT:    orr r2, r2, #-134217728
+; CHECK-NEXT:    and r1, r1, r2
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    vmsr fpscr, r0
+; CHECK-NEXT:    mov pc, lr
+entry:
+  call void @llvm.set.fpmode.i32(i32 %fpmode)
+  ret void
+}
+
+define void @reset_fpmode_01() #0 {
+; CHECK-LABEL: reset_fpmode_01:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    mvn r0, #0
+; CHECK-NEXT:    bl fesetmode
+; CHECK-NEXT:    pop {r11, lr}
+; CHECK-NEXT:    mov pc, lr
+entry:
+  call void @llvm.reset.fpmode()
+  ret void
+}
+
+define void @reset_fpmode_02() nounwind {
+; CHECK-LABEL: reset_fpmode_02:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmrs r0, fpscr
+; CHECK-NEXT:    ldr r1, .LCPI16_0
+; CHECK-NEXT:    and r0, r0, r1
+; CHECK-NEXT:    vmsr fpscr, r0
+; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI16_0:
+; CHECK-NEXT:    .long 4160774399 @ 0xf80060ff
+entry:
+  call void @llvm.reset.fpmode()
+  ret void
+}
+
 attributes #0 = { nounwind "use-soft-float"="true" }
 
 declare void @llvm.set.rounding(i32)
 declare i32 @llvm.get.fpenv.i32()
 declare void @llvm.set.fpenv.i32(i32 %fpenv)
 declare void @llvm.reset.fpenv()
+declare i32 @llvm.get.fpmode.i32()
+declare void @llvm.set.fpmode.i32(i32 %fpmode)
+declare void @llvm.reset.fpmode()

From dea16ebd2613a4a218c53045270fc4fcc9b427ad Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Mon, 18 Dec 2023 11:58:42 +0000
Subject: [PATCH 17/17] [LLVM][IR] Replace ConstantInt's specialisation of
 getType() with getIntegerType(). (#75217)

The specialisation will not be valid when ConstantInt gains native
support for vector types.

This is largely a mechanical change but with extra attention paid to constant
folding, InstCombineVectorOps.cpp, LoopFlatten.cpp and Verifier.cpp to
remove the need to call `getIntegerType()`.

Co-authored-by: Nikita Popov <github@npopov.com>
---
 clang/lib/CodeGen/CGBuiltin.cpp                   |  7 ++++---
 llvm/include/llvm/IR/Constants.h                  |  7 +++----
 llvm/lib/Analysis/InstructionSimplify.cpp         |  2 +-
 llvm/lib/IR/ConstantFold.cpp                      |  2 +-
 llvm/lib/IR/Verifier.cpp                          | 11 ++++++-----
 .../Hexagon/HexagonLoopIdiomRecognition.cpp       |  6 +++---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp             | 15 ++++++++-------
 .../InstCombine/InstCombineVectorOps.cpp          |  4 ++--
 llvm/lib/Transforms/Scalar/ConstantHoisting.cpp   |  5 ++---
 llvm/lib/Transforms/Scalar/LoopFlatten.cpp        |  5 ++---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp         |  8 ++++----
 mlir/lib/Target/LLVMIR/ModuleImport.cpp           |  2 +-
 12 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c42b885289c4c7..4eb1686f095062 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3214,7 +3214,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
     if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
-      AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
+      AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
                                      llvm::Value::MaximumAlignment);
 
     emitAlignmentAssumption(PtrValue, Ptr,
@@ -17034,7 +17034,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     Value *Op1 = EmitScalarExpr(E->getArg(1));
     ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
     if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
-      AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
+      AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
                                      llvm::Value::MaximumAlignment);
 
     emitAlignmentAssumption(Op1, E->getArg(1),
@@ -17272,7 +17272,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
         Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
 
     if (getTarget().isLittleEndian())
-      Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
+      Index =
+          ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
 
     return Builder.CreateExtractElement(Unpacked, Index);
   }
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 0b9f89830b79c6..b5dcc7fbc1d929 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -171,10 +171,9 @@ class ConstantInt final : public ConstantData {
   /// Determine if this constant's value is same as an unsigned char.
   bool equalsInt(uint64_t V) const { return Val == V; }
 
-  /// getType - Specialize the getType() method to always return an IntegerType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline IntegerType *getType() const {
+  /// Variant of the getType() method to always return an IntegerType, which
+  /// reduces the amount of casting needed in parts of the compiler.
+  inline IntegerType *getIntegerType() const {
     return cast<IntegerType>(Value::getType());
   }
 
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 2a45acf63aa2ca..5beac5547d65e0 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6079,7 +6079,7 @@ static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
   Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
 
   auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset);
-  if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
+  if (!OffsetConstInt || OffsetConstInt->getBitWidth() > 64)
     return nullptr;
 
   APInt OffsetInt = OffsetConstInt->getValue().sextOrTrunc(
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index d499d74f7ba010..7fdc35e7fca097 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -868,7 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
           }
 
           if (GVAlign > 1) {
-            unsigned DstWidth = CI2->getType()->getBitWidth();
+            unsigned DstWidth = CI2->getBitWidth();
             unsigned SrcWidth = std::min(DstWidth, Log2(GVAlign));
             APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8aba28026306a5..aeaca21a99cc5e 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2296,10 +2296,9 @@ void Verifier::verifyFunctionMetadata(
       Check(isa<ConstantAsMetadata>(MD->getOperand(0)),
             "expected a constant operand for !kcfi_type", MD);
       Constant *C = cast<ConstantAsMetadata>(MD->getOperand(0))->getValue();
-      Check(isa<ConstantInt>(C),
+      Check(isa<ConstantInt>(C) && isa<IntegerType>(C->getType()),
             "expected a constant integer operand for !kcfi_type", MD);
-      IntegerType *Type = cast<ConstantInt>(C)->getType();
-      Check(Type->getBitWidth() == 32,
+      Check(cast<ConstantInt>(C)->getBitWidth() == 32,
             "expected a 32-bit integer constant operand for !kcfi_type", MD);
     }
   }
@@ -5690,8 +5689,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           "vector of ints");
 
     auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
-    Check(Op3->getType()->getBitWidth() <= 32,
-          "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
+    Check(Op3->getType()->isIntegerTy(),
+          "third operand of [us][mul|div]_fix[_sat] must be an int type");
+    Check(Op3->getBitWidth() <= 32,
+          "third operand of [us][mul|div]_fix[_sat] must fit within 32 bits");
 
     if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat ||
         ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) {
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 51ef72b873a516..7777ae23e8aecd 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1062,7 +1062,7 @@ void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
   // Promote immediates.
   for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
-      if (CI->getType()->getBitWidth() < DestBW)
+      if (CI->getBitWidth() < DestBW)
         In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
   }
 }
@@ -1577,7 +1577,7 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
 
 static bool hasZeroSignBit(const Value *V) {
   if (const auto *CI = dyn_cast<const ConstantInt>(V))
-    return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
+    return CI->getValue().isNonNegative();
   const Instruction *I = dyn_cast<const Instruction>(V);
   if (!I)
     return false;
@@ -1688,7 +1688,7 @@ void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
       if (I->getOpcode() != Instruction::Or)
         return nullptr;
       ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
-      if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
+      if (!Msb || !Msb->getValue().isSignMask())
         return nullptr;
       if (!hasZeroSignBit(I->getOperand(0)))
         return nullptr;
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b2665161c090df..2c880316e0a1cd 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3763,7 +3763,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
     ConstantInt *ExecModeC =
         KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
     ConstantInt *AssumedExecModeC = ConstantInt::get(
-        ExecModeC->getType(),
+        ExecModeC->getIntegerType(),
         ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
     if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
@@ -3792,7 +3792,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
     ConstantInt *MayUseNestedParallelismC =
         KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
     ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
-        MayUseNestedParallelismC->getType(), NestedParallelism);
+        MayUseNestedParallelismC->getIntegerType(), NestedParallelism);
     setMayUseNestedParallelismOfKernelEnvironment(
         AssumedMayUseNestedParallelismC);
 
@@ -3801,7 +3801,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
           KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
               KernelEnvC);
       ConstantInt *AssumedUseGenericStateMachineC =
-          ConstantInt::get(UseGenericStateMachineC->getType(), false);
+          ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false);
       setUseGenericStateMachineOfKernelEnvironment(
           AssumedUseGenericStateMachineC);
     }
@@ -4280,8 +4280,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
     // kernel is executed in.
     assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
            "Initially non-SPMD kernel has SPMD exec mode!");
-    setExecModeOfKernelEnvironment(ConstantInt::get(
-        ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
+    setExecModeOfKernelEnvironment(
+        ConstantInt::get(ExecModeC->getIntegerType(),
+                         ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
 
     ++NumOpenMPTargetRegionKernelsSPMD;
 
@@ -4332,7 +4333,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
 
     // If not SPMD mode, indicate we use a custom state machine now.
     setUseGenericStateMachineOfKernelEnvironment(
-        ConstantInt::get(UseStateMachineC->getType(), false));
+        ConstantInt::get(UseStateMachineC->getIntegerType(), false));
 
     // If we don't actually need a state machine we are done here. This can
     // happen if there simply are no parallel regions. In the resulting kernel
@@ -4658,7 +4659,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
             KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
                 AA.KernelEnvC);
         ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
-            MayUseNestedParallelismC->getType(), AA.NestedParallelism);
+            MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism);
         AA.setMayUseNestedParallelismOfKernelEnvironment(
             NewMayUseNestedParallelismC);
       }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c381d25011f688..bd5f608045cf11 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -388,7 +388,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
 /// arbitrarily pick 64 bit as our canonical type.  The actual bitwidth doesn't
 /// matter, we just want a consistent type to simplify CSE.
 static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
-  const unsigned IndexBW = IndexC->getType()->getBitWidth();
+  const unsigned IndexBW = IndexC->getBitWidth();
   if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
     return nullptr;
   return ConstantInt::get(IndexC->getContext(),
@@ -2640,7 +2640,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
     assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
 
     // Index is updated to the potentially translated insertion lane.
-    IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
+    IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
     return true;
   };
 
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 1fb9d7fff32f66..9e40d94dd73c76 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -674,8 +674,7 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
   llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS,
                                      const ConstantCandidate &RHS) {
     if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
-      return LHS.ConstInt->getType()->getBitWidth() <
-             RHS.ConstInt->getType()->getBitWidth();
+      return LHS.ConstInt->getBitWidth() < RHS.ConstInt->getBitWidth();
     return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue());
   });
 
@@ -890,7 +889,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
         Type *Ty = ConstInfo.BaseExpr->getType();
         Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP);
       } else {
-        IntegerType *Ty = ConstInfo.BaseInt->getType();
+        IntegerType *Ty = ConstInfo.BaseInt->getIntegerType();
         Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP);
       }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index b1add3c42976fd..eef94636578d83 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -343,9 +343,8 @@ static bool verifyTripCount(Value *RHS, Loop *L,
     // If the RHS of the compare is equal to the backedge taken count we need
     // to add one to get the trip count.
     if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
-      ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
-      Value *NewRHS = ConstantInt::get(
-          ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+      Value *NewRHS = ConstantInt::get(ConstantRHS->getContext(),
+                                       ConstantRHS->getValue() + 1);
       return setLoopComponents(NewRHS, TripCount, Increment,
                                IterationInstructions);
     }
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 89494a7f64971f..55e375670cc61e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6293,7 +6293,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
   }
   case BitMapKind: {
     // Type of the bitmap (e.g. i59).
-    IntegerType *MapTy = BitMap->getType();
+    IntegerType *MapTy = BitMap->getIntegerType();
 
     // Cast Index to the same type as the bitmap.
     // Note: The Index is <= the number of elements in the table, so
@@ -6668,7 +6668,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   Value *TableIndex;
   ConstantInt *TableIndexOffset;
   if (UseSwitchConditionAsTableIndex) {
-    TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
+    TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
     TableIndex = SI->getCondition();
   } else {
     TableIndexOffset = MinCaseVal;
@@ -6752,7 +6752,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     // Get the TableIndex'th bit of the bitmask.
     // If this bit is 0 (meaning hole) jump to the default destination,
     // else continue with table lookup.
-    IntegerType *MapTy = TableMask->getType();
+    IntegerType *MapTy = TableMask->getIntegerType();
     Value *MaskIndex =
         Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
     Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
@@ -6975,7 +6975,7 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
   // Replace each case with its trailing zeros number.
   for (auto &Case : SI->cases()) {
     auto *OrigValue = Case.getCaseValue();
-    Case.setValue(ConstantInt::get(OrigValue->getType(),
+    Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
                                    OrigValue->getValue().countr_zero()));
   }
 
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index ec2692f58695d0..905405e9398820 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -720,7 +720,7 @@ static TypedAttr getScalarConstantAsAttr(OpBuilder &builder,
   // Convert scalar intergers.
   if (auto *constInt = dyn_cast<llvm::ConstantInt>(constScalar)) {
     return builder.getIntegerAttr(
-        IntegerType::get(context, constInt->getType()->getBitWidth()),
+        IntegerType::get(context, constInt->getBitWidth()),
         constInt->getValue());
   }