From 489acb2401b51d940fcdbe965d4a5b2d39168b96 Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Tue, 24 Sep 2024 19:31:40 -0700 Subject: [PATCH 01/65] [NVPTX][NFC] Refactor utilities to use std::optional (#109883) --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 16 ++- llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 139 +++++++++------------- llvm/lib/Target/NVPTX/NVPTXUtilities.h | 24 ++-- 3 files changed, 70 insertions(+), 109 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 38c51666139a89..9bcc911b6c3451 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -563,21 +563,19 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1) << ", " << Maxntidz.value_or(1) << "\n"; - unsigned Mincta = 0; - if (getMinCTASm(F, Mincta)) - O << ".minnctapersm " << Mincta << "\n"; + if (const auto Mincta = getMinCTASm(F)) + O << ".minnctapersm " << *Mincta << "\n"; - unsigned Maxnreg = 0; - if (getMaxNReg(F, Maxnreg)) - O << ".maxnreg " << Maxnreg << "\n"; + if (const auto Maxnreg = getMaxNReg(F)) + O << ".maxnreg " << *Maxnreg << "\n"; // .maxclusterrank directive requires SM_90 or higher, make sure that we // filter it out for lower SM versions, as it causes a hard ptxas crash. const NVPTXTargetMachine &NTM = static_cast(TM); const auto *STI = static_cast(NTM.getSubtargetImpl()); - unsigned Maxclusterrank = 0; - if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90) - O << ".maxclusterrank " << Maxclusterrank << "\n"; + if (STI->getSmVersion() >= 90) + if (const auto Maxclusterrank = getMaxClusterRank(F)) + O << ".maxclusterrank " << *Maxclusterrank << "\n"; } std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 80361744fd5b6f..be1c87d07f4ded 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -13,6 +13,7 @@ #include "NVPTXUtilities.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -130,8 +131,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { } } -bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, - unsigned &retval) { +static std::optional findOneNVVMAnnotation(const GlobalValue *gv, + const std::string &prop) { auto &AC = getAnnotationCache(); std::lock_guard Guard(AC.Lock); const Module *m = gv->getParent(); @@ -140,21 +141,13 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, else if (AC.Cache[m].find(gv) == AC.Cache[m].end()) cacheAnnotationFromMD(m, gv); if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end()) - return false; - retval = AC.Cache[m][gv][prop][0]; - return true; -} - -static std::optional -findOneNVVMAnnotation(const GlobalValue &GV, const std::string &PropName) { - unsigned RetVal; - if (findOneNVVMAnnotation(&GV, PropName, RetVal)) - return RetVal; - return std::nullopt; + return std::nullopt; + return AC.Cache[m][gv][prop][0]; } -bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, - std::vector &retval) { +static bool findAllNVVMAnnotation(const GlobalValue *gv, + const std::string &prop, + std::vector &retval) { auto &AC = getAnnotationCache(); std::lock_guard Guard(AC.Lock); const Module *m = gv->getParent(); @@ -168,25 +161,13 @@ bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, return true; } -bool isTexture(const Value &val) { - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "texture", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a texture symbol"); +static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) { + if (const auto *GV = dyn_cast(&V)) + if (const auto Annot = findOneNVVMAnnotation(GV, Prop)) { + assert((*Annot == 1) && "Unexpected annotation on a symbol"); return true; } - } - return false; -} -bool isSurface(const Value &val) { - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "surface", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a surface symbol"); - return true; - } - } return false; } @@ -220,71 +201,60 @@ bool isParamGridConstant(const Value &V) { return false; } -bool isSampler(const Value &val) { +bool isTexture(const Value &V) { return globalHasNVVMAnnotation(V, "texture"); } + +bool isSurface(const Value &V) { return globalHasNVVMAnnotation(V, "surface"); } + +bool isSampler(const Value &V) { const char *AnnotationName = "sampler"; - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, AnnotationName, Annot)) { - assert((Annot == 1) && "Unexpected annotation on a sampler symbol"); - return true; - } - } - return argHasNVVMAnnotation(val, AnnotationName); + return globalHasNVVMAnnotation(V, AnnotationName) || + argHasNVVMAnnotation(V, AnnotationName); } -bool isImageReadOnly(const Value &val) { - return argHasNVVMAnnotation(val, "rdoimage"); +bool isImageReadOnly(const Value &V) { + return argHasNVVMAnnotation(V, "rdoimage"); } -bool isImageWriteOnly(const Value &val) { - return argHasNVVMAnnotation(val, "wroimage"); +bool isImageWriteOnly(const Value &V) { + return argHasNVVMAnnotation(V, "wroimage"); } -bool isImageReadWrite(const Value &val) { - return argHasNVVMAnnotation(val, "rdwrimage"); +bool isImageReadWrite(const Value &V) { + return argHasNVVMAnnotation(V, "rdwrimage"); } -bool isImage(const Value &val) { - return isImageReadOnly(val) || isImageWriteOnly(val) || isImageReadWrite(val); +bool isImage(const Value &V) { + return isImageReadOnly(V) || isImageWriteOnly(V) || isImageReadWrite(V); } -bool isManaged(const Value &val) { - if(const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "managed", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a managed symbol"); - return true; - } - } - return false; -} +bool isManaged(const Value &V) { return globalHasNVVMAnnotation(V, "managed"); } -std::string getTextureName(const Value &val) { - assert(val.hasName() && "Found texture variable with no name"); - return std::string(val.getName()); +StringRef getTextureName(const Value &V) { + assert(V.hasName() && "Found texture variable with no name"); + return V.getName(); } -std::string getSurfaceName(const Value &val) { - assert(val.hasName() && "Found surface variable with no name"); - return std::string(val.getName()); +StringRef getSurfaceName(const Value &V) { + assert(V.hasName() && "Found surface variable with no name"); + return V.getName(); } -std::string getSamplerName(const Value &val) { - assert(val.hasName() && "Found sampler variable with no name"); - return std::string(val.getName()); +StringRef getSamplerName(const Value &V) { + assert(V.hasName() && "Found sampler variable with no name"); + return V.getName(); } std::optional getMaxNTIDx(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidx"); + return findOneNVVMAnnotation(&F, "maxntidx"); } std::optional getMaxNTIDy(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidy"); + return findOneNVVMAnnotation(&F, "maxntidy"); } std::optional getMaxNTIDz(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidz"); + return findOneNVVMAnnotation(&F, "maxntidz"); } std::optional getMaxNTID(const Function &F) { @@ -302,20 +272,20 @@ std::optional getMaxNTID(const Function &F) { return std::nullopt; } -bool getMaxClusterRank(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxclusterrank", x); +std::optional getMaxClusterRank(const Function &F) { + return findOneNVVMAnnotation(&F, "maxclusterrank"); } std::optional getReqNTIDx(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidx"); + return findOneNVVMAnnotation(&F, "reqntidx"); } std::optional getReqNTIDy(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidy"); + return findOneNVVMAnnotation(&F, "reqntidy"); } std::optional getReqNTIDz(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidz"); + return findOneNVVMAnnotation(&F, "reqntidz"); } std::optional getReqNTID(const Function &F) { @@ -328,21 +298,20 @@ std::optional getReqNTID(const Function &F) { return std::nullopt; } -bool getMinCTASm(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "minctasm", x); +std::optional getMinCTASm(const Function &F) { + return findOneNVVMAnnotation(&F, "minctasm"); } -bool getMaxNReg(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxnreg", x); +std::optional getMaxNReg(const Function &F) { + return findOneNVVMAnnotation(&F, "maxnreg"); } bool isKernelFunction(const Function &F) { - unsigned x = 0; - if (!findOneNVVMAnnotation(&F, "kernel", x)) { - // There is no NVVM metadata, check the calling convention - return F.getCallingConv() == CallingConv::PTX_Kernel; - } - return (x == 1); + if (const auto X = findOneNVVMAnnotation(&F, "kernel")) + return (*X == 1); + + // There is no NVVM metadata, check the calling convention + return F.getCallingConv() == CallingConv::PTX_Kernel; } MaybeAlign getAlign(const Function &F, unsigned Index) { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 938b9b04b7a449..cf15dff85cbde0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -32,11 +32,6 @@ class TargetMachine; void clearAnnotationCache(const Module *); -bool findOneNVVMAnnotation(const GlobalValue *, const std::string &, - unsigned &); -bool findAllNVVMAnnotation(const GlobalValue *, const std::string &, - std::vector &); - bool isTexture(const Value &); bool isSurface(const Value &); bool isSampler(const Value &); @@ -46,23 +41,23 @@ bool isImageWriteOnly(const Value &); bool isImageReadWrite(const Value &); bool isManaged(const Value &); -std::string getTextureName(const Value &); -std::string getSurfaceName(const Value &); -std::string getSamplerName(const Value &); +StringRef getTextureName(const Value &); +StringRef getSurfaceName(const Value &); +StringRef getSamplerName(const Value &); std::optional getMaxNTIDx(const Function &); std::optional getMaxNTIDy(const Function &); std::optional getMaxNTIDz(const Function &); -std::optional getMaxNTID(const Function &F); +std::optional getMaxNTID(const Function &); std::optional getReqNTIDx(const Function &); std::optional getReqNTIDy(const Function &); std::optional getReqNTIDz(const Function &); std::optional getReqNTID(const Function &); -bool getMaxClusterRank(const Function &, unsigned &); -bool getMinCTASm(const Function &, unsigned &); -bool getMaxNReg(const Function &, unsigned &); +std::optional getMaxClusterRank(const Function &); +std::optional getMinCTASm(const Function &); +std::optional getMaxNReg(const Function &); bool isKernelFunction(const Function &); bool isParamGridConstant(const Value &); @@ -75,10 +70,9 @@ Function *getMaybeBitcastedCallee(const CallBase *CB); inline unsigned promoteScalarArgumentSize(unsigned size) { if (size <= 32) return 32; - else if (size <= 64) + if (size <= 64) return 64; - else - return size; + return size; } bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM); From 4ca4460bae12eefe90bf69704a33bdd5b1c9f142 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sun, 22 Sep 2024 23:29:45 -0700 Subject: [PATCH 02/65] [hwasan] Add "-hwasan-with-frame-record" (#109620) It should not be implied form mapping settings. No longer disable frame records for fixed offset. --- .../Transforms/Instrumentation/HWAddressSanitizer.cpp | 9 +++++++-- .../Instrumentation/HWAddressSanitizer/RISCV/alloca.ll | 4 ++-- llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll | 8 ++++---- llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll | 4 ++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 49d463a07553f2..7d3db5c2c8d5c3 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -187,6 +187,11 @@ static cl::opt ClMappingOffsetDynamic( clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"), clEnumValN(OffsetKind::kTls, "tls", "Use TLS"))); +static cl::opt + ClFrameRecords("hwasan-with-frame-record", + cl::desc("Use ring buffer for stack allocations"), + cl::Hidden); + static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", cl::desc("Hot percentile cuttoff.")); @@ -1934,12 +1939,12 @@ void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple, SetFixed(0); } else if (ClMappingOffset.getNumOccurrences() > 0) { SetFixed(ClMappingOffset); - WithFrameRecord = false; } else if (ClEnableKhwasan || InstrumentWithCalls) { SetFixed(0); WithFrameRecord = false; } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) { Kind = ClMappingOffsetDynamic; - WithFrameRecord = isInTls(); } + + WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord); } diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll index 24a89af97cffeb..edbcdbeb8516cd 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll @@ -2,8 +2,8 @@ ; Test alloca instrumentation. Command line includes check-globals so that ; changes to debug-info are detectable. ; -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux" diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll index 4d0cce72470b96..451ab9ee184a3a 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -2,11 +2,11 @@ ; Test alloca instrumentation. Command line includes check-globals so that ; changes to debug-info are detectable. ; -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android10000" diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll index 005a11b00c7a56..73fc077c956242 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -7,9 +7,9 @@ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-TLS-HISTORY ; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=tls -hwasan-record-stack-history=none < %s | \ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-TLS-NOHISTORY -; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=global < %s | \ +; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=global -hwasan-with-frame-record=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-NOTLS -; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=ifunc < %s | \ +; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=IFUNC-NOTLS ; RUN: opt -passes=hwasan -S -mtriple=aarch64-fuchsia < %s | \ ; RUN: FileCheck %s --check-prefixes=FUCHSIA From fa627d98e87504b6f6d621a7dab5d140340ed760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 24 Sep 2024 20:00:11 -0700 Subject: [PATCH 03/65] [flang][cuda] Add entry point for alloc/free and simple copy (#109867) These will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays. --- flang/include/flang/Runtime/CUDA/common.h | 7 +++ flang/include/flang/Runtime/CUDA/memory.h | 12 +++++ flang/runtime/CUDA/memory.cpp | 52 +++++++++++++++++++++ flang/unittests/Runtime/CUDA/CMakeLists.txt | 1 + flang/unittests/Runtime/CUDA/Memory.cpp | 31 ++++++++++++ 5 files changed, 103 insertions(+) create mode 100644 flang/unittests/Runtime/CUDA/Memory.cpp diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h index cb8681da161f0d..b73bc390ea8c9e 100644 --- a/flang/include/flang/Runtime/CUDA/common.h +++ b/flang/include/flang/Runtime/CUDA/common.h @@ -12,6 +12,13 @@ #include "flang/Runtime/descriptor.h" #include "flang/Runtime/entry-names.h" +/// Type of memory for allocation/deallocation +static constexpr unsigned kMemTypeDevice = 0; +static constexpr unsigned kMemTypeManaged = 1; +static constexpr unsigned kMemTypeUnified = 2; +static constexpr unsigned kMemTypePinned = 3; + +/// Data transfer kinds. static constexpr unsigned kHostToDevice = 0; static constexpr unsigned kDeviceToHost = 1; static constexpr unsigned kDeviceToDevice = 2; diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h index 33947248dc4831..3c3ae73d4ad7a1 100644 --- a/flang/include/flang/Runtime/CUDA/memory.h +++ b/flang/include/flang/Runtime/CUDA/memory.h @@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda { extern "C" { +/// Allocate memory on the device. +void *RTDECL(CUFMemAlloc)(std::size_t bytes, unsigned type, + const char *sourceFile = nullptr, int sourceLine = 0); + +/// Free memory allocated on the device. +void RTDECL(CUFMemFree)(void *devicePtr, unsigned type, + const char *sourceFile = nullptr, int sourceLine = 0); + /// Set value to the data hold by a descriptor. The \p value pointer must be /// addressable to the same amount of bytes specified by the element size of /// the descriptor \p desc. void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value, const char *sourceFile = nullptr, int sourceLine = 0); +/// Data transfer from a pointer to a pointer. +void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, + unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); + /// Data transfer from a pointer to a descriptor. void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index a287fa14a48789..fc48b4343eea9d 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -8,12 +8,47 @@ #include "flang/Runtime/CUDA/memory.h" #include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { extern "C" { +void *RTDEF(CUFMemAlloc)( + std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { + void *ptr = nullptr; + if (bytes != 0) { + if (type == kMemTypeDevice) { + CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); + } else if (type == kMemTypeManaged || type == kMemTypeUnified) { + CUDA_REPORT_IF_ERROR( + cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); + } else if (type == kMemTypePinned) { + CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("unsupported memory type"); + } + } + return ptr; +} + +void RTDEF(CUFMemFree)( + void *ptr, unsigned type, const char *sourceFile, int sourceLine) { + if (!ptr) + return; + if (type == kMemTypeDevice || type == kMemTypeManaged || + type == kMemTypeUnified) { + CUDA_REPORT_IF_ERROR(cudaFree(ptr)); + } else if (type == kMemTypePinned) { + CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("unsupported memory type"); + } +} + void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; @@ -21,6 +56,23 @@ void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, "value to a descriptor"); } +void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, + unsigned mode, const char *sourceFile, int sourceLine) { + cudaMemcpyKind kind; + if (mode == kHostToDevice) { + kind = cudaMemcpyHostToDevice; + } else if (mode == kDeviceToHost) { + kind = cudaMemcpyDeviceToHost; + } else if (mode == kDeviceToDevice) { + kind = cudaMemcpyDeviceToDevice; + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("host to host copy not supported"); + } + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); +} + void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt index 30fb8c220233c0..a7fe604d687bdd 100644 --- a/flang/unittests/Runtime/CUDA/CMakeLists.txt +++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt @@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME) add_flang_unittest(FlangCufRuntimeTests Allocatable.cpp AllocatorCUF.cpp + Memory.cpp ) if (BUILD_SHARED_LIBS) diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp new file mode 100644 index 00000000000000..157d3cdb531def --- /dev/null +++ b/flang/unittests/Runtime/CUDA/Memory.cpp @@ -0,0 +1,31 @@ +//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/memory.h" +#include "gtest/gtest.h" +#include "../../../runtime/terminator.h" +#include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +using namespace Fortran::runtime::cuda; + +TEST(MemoryCUFTest, SimpleAllocTramsferFree) { + int *dev = (int *)RTNAME(CUFMemAlloc)( + sizeof(int), kMemTypeDevice, __FILE__, __LINE__); + EXPECT_TRUE(dev != 0); + int host = 42; + RTNAME(CUFDataTransferPtrPtr) + ((void *)dev, (void *)&host, sizeof(int), kHostToDevice, __FILE__, __LINE__); + host = 0; + RTNAME(CUFDataTransferPtrPtr) + ((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__); + EXPECT_EQ(42, host); + RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__); +} From 18b9d49ce3370c012fdd04ec87d854d53293f6a6 Mon Sep 17 00:00:00 2001 From: Andrew Rogers Date: Tue, 24 Sep 2024 20:00:34 -0700 Subject: [PATCH 04/65] lldb: get lldb API tests working with newer Android NDKs ## Purpose Running the LLDB API tests against a remote Android target with NDK version r22 or later fails to compile the test inferiors. NDK r21 from 2021 is the most recent NDK that still works with the LLDB API tests. This PR updates the Android make rules to support newer Android NDK versions (r19 and later). ## Overview * Updates and simplifies `Android.rules` to match the newer Android NDK unified toolchain layout introduced in NDK r19 * Sets `OBJCOPY` and `ARCHIVER` env vars, required by a few test cases, to their `llvm-` versions in the unified toolchain * Drops support for pre-2019 Android NDK versions to keep the rules simple * Provides an error message if the tests are run using an incompatible NDK layout ## Problem Details Android introduced a unified tools layout in NDK r19 (2019) and removed the old layout in r22 (2021). Releases r19, r20, and r21 support both the old and new layout side-by-side. More details are in #106270. ## Validation Ran a sub-set of the LLDB API tests against remote Android targets for the four primary architectures i386, x86_64, arm, and aarch64. No validation was done against riscv targets. For each case, ran the copy of `lldb-server` from the Android NDK on the device with the latest LLDB test cases in llvm-project Ran tests with both r19 (the oldest supported) and r26 (more recent, unified layout only) NDK versions. Example test command for aarch64: ``` ./build/bin/lldb-dotest --out-of-tree-debugserver --arch aarch64 --platform-name remote-android --platform-url connect://localhost:5432 --platform-working-dir /data/local/tmp --compiler=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/clang lldb/test/API/android/ ``` **NOTE: there are a lot of test failures when running the full suite (especially against 32-bit ARM target). These failures occur independent of this change.** Verified the expected error message appears when attempting to run using NDK r18 ``` Build Command Output: make: Entering directory '/home/andrew/src/llvm/llvm-project/build/lldb-test-build.noindex/android/platform/TestDefaultCacheLineSize.test_cache_line_size' /home/andrew/src/llvm/llvm-project/lldb/packages/Python/lldbsuite/test/make/Android.rules:16: *** "No unified toolchain sysroot found in /home/andrew/Android/Sdk/ndk/18.1.5063045/toolchains/llvm/prebuilt/linux-x86_64/bin/../../../../... NDK must be r19 or later.". Stop. make: Leaving directory '/home/andrew/src/llvm/llvm-project/build/lldb-test-build.noindex/android/platform/TestDefaultCacheLineSize.test_cache_line_size' ``` ## Impact **This change explicitly removes support for the pre-2019 NDK structure.** Only NDK r19 (from 2019) and later can be used when running the LLDB API tests. If the maintainers object, we can easily support both the old and new NDK toolchain layouts side-by-side at the cost of readability/maintainability. Since this change only impacts tests, I don't see much value in supporting NDKs that are over 5 years old. ## Guidance to Reviewers * I am not an expert on `clang` arguments so if anything looks off let me know. * While I personally thing supporting 5+ year old NDKs for testing seems unnecessary, please chime-in if you are concerned with dropping that support. I can easily revise to support both old and new layouts side-by-side. * If there are any specific tests you'd like me to run I will do my best to accommodate. It doesn't look like there's much (any?) Android LLDB CI coverage. --- .../Python/lldbsuite/test/make/Android.rules | 86 +++++++------------ 1 file changed, 32 insertions(+), 54 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/make/Android.rules b/lldb/packages/Python/lldbsuite/test/make/Android.rules index cd7d8ae74d6bf3..44aedf7248419e 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Android.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Android.rules @@ -1,81 +1,59 @@ NDK_ROOT := $(shell dirname $(CC))/../../../../.. -ifeq "$(findstring 64, $(ARCH))" "64" - # lowest 64-bit API level - API_LEVEL := 21 -else ifeq "$(ARCH)" "i386" - # clone(2) declaration is present only since this api level - API_LEVEL := 17 +ifeq "$(HOST_OS)" "Linux" + HOST_TAG := linux-x86_64 +else ifeq "$(HOST_OS)" "Darwin" + HOST_TAG := darwin-x86_64 else - # lowest supported 32-bit API level - API_LEVEL := 16 + HOST_TAG := windows-x86_64 +endif + +TOOLCHAIN_ROOT := $(NDK_ROOT)/toolchains/llvm/prebuilt/$(HOST_TAG) +TOOLCHAIN_SYSROOT := $(TOOLCHAIN_ROOT)/sysroot + +OBJCOPY ?= $(TOOLCHAIN_ROOT)/bin/llvm-objcopy +ARCHIVER ?= $(TOOLCHAIN_ROOT)/bin/llvm-ar + +ifeq "$(wildcard $(TOOLCHAIN_SYSROOT)/.)" "" +# Compiling test inferiors for Android requires an NDK with the unified +# toolchain introduced in version r19. +$(error "No unified toolchain sysroot found in $(NDK_ROOT). NDK must be r19 or later.") endif ifeq "$(ARCH)" "arm" - SYSROOT_ARCH := arm - STL_ARCH := armeabi-v7a TRIPLE := armv7-none-linux-androideabi ARCH_CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16 -marm else ifeq "$(ARCH)" "aarch64" - SYSROOT_ARCH := arm64 - STL_ARCH := arm64-v8a TRIPLE := aarch64-none-linux-android else ifeq "$(ARCH)" "i386" - SYSROOT_ARCH := x86 - STL_ARCH := x86 TRIPLE := i686-none-linux-android else - SYSROOT_ARCH := $(ARCH) - STL_ARCH := $(ARCH) TRIPLE := $(ARCH)-none-linux-android endif -ifeq "$(findstring 86,$(ARCH))" "86" - TOOLCHAIN_DIR := $(STL_ARCH)-4.9 -else ifeq "$(ARCH)" "arm" - TOOLCHAIN_DIR := arm-linux-androideabi-4.9 -else - TOOLCHAIN_DIR := $(subst -none,,$(TRIPLE))-4.9 -endif +# lowest 64-bit API level +API_LEVEL := 21 ifeq "$(ARCH)" "arm" - TOOL_PREFIX := arm-linux-androideabi + ARCH_DIR := arm-linux-androideabi else - TOOL_PREFIX := $(subst -none,,$(TRIPLE)) + ARCH_DIR := $(subst -none,,$(TRIPLE)) endif -ifeq "$(HOST_OS)" "Linux" - HOST_TAG := linux-x86_64 -else ifeq "$(HOST_OS)" "Darwin" - HOST_TAG := darwin-x86_64 -else - HOST_TAG := windows-x86_64 -endif - -GCC_TOOLCHAIN = $(NDK_ROOT)/toolchains/$(TOOLCHAIN_DIR)/prebuilt/$(HOST_TAG) - -OBJCOPY ?= $(GCC_TOOLCHAIN)/bin/$(TOOL_PREFIX)-objcopy -ARCHIVER ?= $(GCC_TOOLCHAIN)/bin/$(TOOL_PREFIX)-ar - -ifeq "$(findstring clang,$(CC))" "clang" - ARCH_CFLAGS += -target $(TRIPLE) --gcc-toolchain=$(GCC_TOOLCHAIN) - ARCH_LDFLAGS += -target $(TRIPLE) --gcc-toolchain=$(GCC_TOOLCHAIN) -endif - -ARCH_CFLAGS += --sysroot=$(NDK_ROOT)/sysroot \ - -isystem $(NDK_ROOT)/sysroot/usr/include/$(TOOL_PREFIX) \ - -D__ANDROID_API__=$(API_LEVEL) \ - -isystem $(NDK_ROOT)/platforms/android-$(API_LEVEL)/arch-$(SYSROOT_ARCH)/usr/include - -ARCH_LDFLAGS += --sysroot=$(NDK_ROOT)/platforms/android-$(API_LEVEL)/arch-$(SYSROOT_ARCH) -lm +ARCH_CFLAGS += \ + --target=$(TRIPLE) \ + --sysroot=$(TOOLCHAIN_SYSROOT) \ + -D__ANDROID_API__=$(API_LEVEL) ARCH_CXXFLAGS += \ - -isystem $(NDK_ROOT)/sources/cxx-stl/llvm-libc++/include \ - -isystem $(NDK_ROOT)/sources/android/support/include \ - -isystem $(NDK_ROOT)/sources/cxx-stl/llvm-libc++abi/include + -isystem $(TOOLCHAIN_SYSROOT)/usr/include/c++/v1 ARCH_LDFLAGS += \ - -L$(NDK_ROOT)/sources/cxx-stl/llvm-libc++/libs/$(STL_ARCH) \ - $(NDK_ROOT)/sources/cxx-stl/llvm-libc++/libs/$(STL_ARCH)/libc++_static.a \ + --target=$(TRIPLE) \ + --sysroot=$(TOOLCHAIN_SYSROOT) \ + --prefix=$(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/$(API_LEVEL) \ + -L$(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/$(API_LEVEL) \ + $(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/libc++_static.a \ + -lm \ -lc++abi \ -nostdlib++ From d50eaac12f0cdfe27e942290942b06889ab12a8c Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 24 Sep 2024 20:31:54 -0700 Subject: [PATCH 05/65] Revert "[clang][CodeGen] Zero init unspecified fields in initializers in C" (#109898) Reverts llvm/llvm-project#97121 Causing failures on LNT bots; log shows a crash in ConstStructBuilder::BuildStruct. --- clang/docs/LanguageExtensions.rst | 23 -- clang/lib/CodeGen/CGExprAgg.cpp | 40 +-- clang/lib/CodeGen/CGExprConstant.cpp | 93 +----- clang/lib/CodeGen/CodeGenModule.h | 51 ---- ...07-22-bitfield-init-after-zero-len-array.c | 2 +- clang/test/CodeGen/2008-08-07-AlignPadding1.c | 4 +- .../CodeGen/2009-06-14-anonymous-union-init.c | 4 +- clang/test/CodeGen/64bit-swiftcall.c | 12 +- clang/test/CodeGen/arm-swiftcall.c | 4 +- clang/test/CodeGen/const-init.c | 4 +- clang/test/CodeGen/decl.c | 4 +- clang/test/CodeGen/designated-initializers.c | 12 +- clang/test/CodeGen/ext-int.c | 18 +- clang/test/CodeGen/flexible-array-init.c | 24 +- clang/test/CodeGen/global-init.c | 2 +- clang/test/CodeGen/init.c | 19 ++ .../linux-kernel-struct-union-initializer.c | 267 ------------------ .../linux-kernel-struct-union-initializer2.c | 140 --------- clang/test/CodeGen/mingw-long-double.c | 9 +- clang/test/CodeGen/mms-bitfields.c | 4 +- clang/test/CodeGen/union-init2.c | 4 +- clang/test/CodeGen/windows-swiftcall.c | 12 +- .../CodeGenObjC/designated-initializers.m | 2 +- 23 files changed, 96 insertions(+), 658 deletions(-) delete mode 100644 clang/test/CodeGen/linux-kernel-struct-union-initializer.c delete mode 100644 clang/test/CodeGen/linux-kernel-struct-union-initializer2.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f4be97047422fa..0c6b9b1b8f9ce4 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5860,26 +5860,3 @@ specify the starting offset to begin embedding from. The resources is treated as being empty if the specified offset is larger than the number of bytes in the resource. The offset will be applied *before* any ``limit`` parameters are applied. - -Union and aggregate initialization in C -======================================= - -In C23 (N2900), when an object is initialized from initializer ``= {}``, all -elements of arrays, all members of structs, and the first members of unions are -empty-initialized recursively. In addition, all padding bits are initialized to -zero. - -Clang guarantees the following behaviors: - -* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C - standards. - -* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside - of the first members of unions are also initialized to zero. - -* ``3:`` When unions, structures and arrays are initialized from initializer - ``= { initializer-list }``, all members not explicitly initialized in - the initializer list are empty-initialized recursively. In addition, all - padding bits are initialized to zero. - -Currently, the above extension only applies to C source code, not C++. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 43f3bcc95fe767..bbfc6672ecc25a 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1698,17 +1698,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( // Prepare a 'this' for CXXDefaultInitExprs. CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress()); - const bool ZeroInitPadding = - CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed(); - const Address BaseLoc = Dest.getAddress().withElementType(CGF.Int8Ty); - auto DoZeroInitPadding = [&](CharUnits Offset, CharUnits Size) { - if (Size.isPositive()) { - Address Loc = CGF.Builder.CreateConstGEP(BaseLoc, Offset.getQuantity()); - llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity()); - CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false); - } - }; - if (record->isUnion()) { // Only initialize one field of a union. The field itself is // specified by the initializer list. @@ -1733,37 +1722,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (NumInitElements) { // Store the initializer into the field EmitInitializationToLValue(InitExprs[0], FieldLoc); - if (ZeroInitPadding) { - CharUnits TotalSize = - Dest.getPreferredSize(CGF.getContext(), DestLV.getType()); - CharUnits FieldSize = - CGF.getContext().getTypeSizeInChars(FieldLoc.getType()); - DoZeroInitPadding(FieldSize, TotalSize - FieldSize); - } } else { // Default-initialize to null. - if (ZeroInitPadding) - EmitNullInitializationToLValue(DestLV); - else - EmitNullInitializationToLValue(FieldLoc); + EmitNullInitializationToLValue(FieldLoc); } + return; } // Here we iterate over the fields; this makes it simpler to both // default-initialize fields and skip over unnamed fields. - const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record); - CharUnits SizeSoFar = CharUnits::Zero(); for (const auto *field : record->fields()) { - if (ZeroInitPadding) { - unsigned FieldNo = field->getFieldIndex(); - CharUnits Offset = - CGF.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); - DoZeroInitPadding(SizeSoFar, Offset - SizeSoFar); - CharUnits FieldSize = - CGF.getContext().getTypeSizeInChars(field->getType()); - SizeSoFar = Offset + FieldSize; - } // We're done once we hit the flexible array member. if (field->getType()->isIncompleteArrayType()) break; @@ -1805,11 +1774,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( } } } - if (ZeroInitPadding) { - CharUnits TotalSize = - Dest.getPreferredSize(CGF.getContext(), DestLV.getType()); - DoZeroInitPadding(SizeSoFar, TotalSize - SizeSoFar); - } } void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 66bc0640b632aa..dd65080a840446 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -42,16 +42,6 @@ using namespace CodeGen; namespace { class ConstExprEmitter; -llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) { - llvm::Type *Ty = CGM.CharTy; - if (PadSize > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); - if (CGM.shouldZeroInitPadding()) { - return llvm::Constant::getNullValue(Ty); - } - return llvm::UndefValue::get(Ty); -} - struct ConstantAggregateBuilderUtils { CodeGenModule &CGM; @@ -71,7 +61,10 @@ struct ConstantAggregateBuilderUtils { } llvm::Constant *getPadding(CharUnits PadSize) const { - return ::getPadding(CGM, PadSize); + llvm::Type *Ty = CGM.CharTy; + if (PadSize > CharUnits::One()) + Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + return llvm::UndefValue::get(Ty); } llvm::Constant *getZeroes(CharUnits ZeroSize) const { @@ -598,11 +591,6 @@ class ConstStructBuilder { bool Build(const InitListExpr *ILE, bool AllowOverwrite); bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); - bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo, - const FieldDecl &Field, bool AllowOverwrite, - CharUnits &FieldSize, CharUnits &SizeSoFar); - bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite, - CharUnits &SizeSoFar); llvm::Constant *Finalize(QualType Ty); }; @@ -727,10 +715,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { if (CXXRD->getNumBases()) return false; - const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); - CharUnits FieldSize = CharUnits::Zero(); - CharUnits SizeSoFar = CharUnits::Zero(); - for (FieldDecl *Field : RD->fields()) { ++FieldNo; @@ -748,13 +732,8 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { const Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) Init = ILE->getInit(ElementNo++); - if (isa_and_nonnull(Init)) { - if (ZeroInitPadding && - !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize, - SizeSoFar)) - return false; + if (isa_and_nonnull(Init)) continue; - } // Zero-sized fields are not emitted, but their initializers may still // prevent emission of this struct as a constant. @@ -764,11 +743,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { continue; } - if (ZeroInitPadding && - !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize, - SizeSoFar)) - return false; - // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr // represents additional overwriting of our current constant value, and not // a new constant to emit independently. @@ -794,10 +768,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { if (!EltInit) return false; - if (ZeroInitPadding && FieldSize.isZero()) - SizeSoFar += CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(EltInit->getType())); - if (!Field->isBitField()) { // Handle non-bitfield members. if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit, @@ -815,9 +785,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { } } - if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar)) - return false; - return true; } @@ -882,9 +849,6 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, unsigned FieldNo = 0; uint64_t OffsetBits = CGM.getContext().toBits(Offset); - const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); - CharUnits FieldSize = CharUnits::Zero(); - CharUnits SizeSoFar = CharUnits::Zero(); bool AllowOverwrite = false; for (RecordDecl::field_iterator Field = RD->field_begin(), @@ -906,15 +870,6 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (!EltInit) return false; - if (ZeroInitPadding) { - if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite, - FieldSize, SizeSoFar)) - return false; - if (FieldSize.isZero()) - SizeSoFar += CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(EltInit->getType())); - } - if (!Field->isBitField()) { // Handle non-bitfield members. if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, @@ -931,35 +886,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, return false; } } - if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar)) - return false; - - return true; -} -bool ConstStructBuilder::DoZeroInitPadding( - const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field, - bool AllowOverwrite, CharUnits &FieldSize, CharUnits &SizeSoFar) { - CharUnits Offset = - CGM.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); - if (SizeSoFar < Offset) - if (!AppendBytes(SizeSoFar, getPadding(CGM, Offset - SizeSoFar), - AllowOverwrite)) - return false; - FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType()); - SizeSoFar = Offset + FieldSize; - return true; -} - -bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout, - bool AllowOverwrite, - CharUnits &SizeSoFar) { - CharUnits TotalSize = Layout.getSize(); - if (SizeSoFar < TotalSize) - if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar), - AllowOverwrite)) - return false; - SizeSoFar = TotalSize; return true; } @@ -1200,10 +1127,12 @@ class ConstExprEmitter assert(CurSize <= TotalSize && "Union size mismatch!"); if (unsigned NumPadBytes = TotalSize - CurSize) { - llvm::Constant *Padding = - getPadding(CGM, CharUnits::fromQuantity(NumPadBytes)); - Elts.push_back(Padding); - Types.push_back(Padding->getType()); + llvm::Type *Ty = CGM.CharTy; + if (NumPadBytes > 1) + Ty = llvm::ArrayType::get(Ty, NumPadBytes); + + Elts.push_back(llvm::UndefValue::get(Ty)); + Types.push_back(Ty); } llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index fcdfef03088da5..c58bb88035ca8a 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1676,57 +1676,6 @@ class CodeGenModule : public CodeGenTypeCache { MustTailCallUndefinedGlobals.insert(Global); } - bool shouldZeroInitPadding() const { - // In C23 (N3096) $6.7.10: - // """ - // If any object is initialized with an empty iniitializer, then it is - // subject to default initialization: - // - if it is an aggregate, every member is initialized (recursively) - // according to these rules, and any padding is initialized to zero bits; - // - if it is a union, the first named member is initialized (recursively) - // according to these rules, and any padding is initialized to zero bits. - // - // If the aggregate or union contains elements or members that are - // aggregates or unions, these rules apply recursively to the subaggregates - // or contained unions. - // - // If there are fewer initializers in a brace-enclosed list than there are - // elements or members of an aggregate, or fewer characters in a string - // literal used to initialize an array of known size than there are elements - // in the array, the remainder of the aggregate is subject to default - // initialization. - // """ - // - // From my understanding, the standard is ambiguous in the following two - // areas: - // 1. For a union type with empty initializer, if the first named member is - // not the largest member, then the bytes comes after the first named member - // but before padding are left unspecified. An example is: - // union U { int a; long long b;}; - // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left - // unspecified. - // - // 2. It only mentions padding for empty initializer, but doesn't mention - // padding for a non empty initialization list. And if the aggregation or - // union contains elements or members that are aggregates or unions, and - // some are non empty initializers, while others are empty initiailizers, - // the padding initialization is unclear. An example is: - // struct S1 { int a; long long b; }; - // struct S2 { char c; struct S1 s1; }; - // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a - // and s2.s1.b are unclear. - // struct S2 s2 = { 'c' }; - // - // Here we choose to zero initiailize left bytes of a union type. Because - // projects like the Linux kernel are relying on this behavior. If we don't - // explicitly zero initialize them, the undef values can be optimized to - // return gabage data. We also choose to zero initialize paddings for - // aggregates and unions, no matter they are initialized by empty - // initializers or non empty initializers. This can provide a consistent - // behavior. So projects like the Linux kernel can rely on it. - return !getLangOpts().CPlusPlus; - } - private: bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; diff --git a/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c b/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c index b639734ef5d4b7..b72d689659e602 100644 --- a/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c +++ b/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c @@ -8,4 +8,4 @@ struct et7 { 52, }; -// CHECK: @yv7 ={{.*}} global { [0 x float], i8, [3 x i8] } { [0 x float] zeroinitializer, i8 52, [3 x i8] zeroinitializer } +// CHECK: @yv7 ={{.*}} global %struct.et7 { [0 x float] zeroinitializer, i8 52 } diff --git a/clang/test/CodeGen/2008-08-07-AlignPadding1.c b/clang/test/CodeGen/2008-08-07-AlignPadding1.c index d69cbc22cc1dfb..17e88ce02659f0 100644 --- a/clang/test/CodeGen/2008-08-07-AlignPadding1.c +++ b/clang/test/CodeGen/2008-08-07-AlignPadding1.c @@ -20,9 +20,9 @@ struct gc_generation { #define GEN_HEAD(n) (&generations[n].head) -// The idea is that there are 6 zeroinitializers in this structure initializer to cover +// The idea is that there are 6 undefs in this structure initializer to cover // the padding between elements. -// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] zeroinitializer }, i32 700, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }] +// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }] /* linked lists of container objects */ struct gc_generation generations[3] = { /* PyGC_Head, threshold, count */ diff --git a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c index a4375d7868f01d..13f6357f7966d9 100644 --- a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c +++ b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c @@ -7,7 +7,7 @@ struct sysfs_dirent { }; struct sysfs_dirent sysfs_root = { {}, 16877 }; -// CHECK: @sysfs_root = {{.*}}global { %union.anon, i16, [2 x i8] } { %union.anon zeroinitializer, i16 16877, [2 x i8] zeroinitializer } +// CHECK: @sysfs_root = {{.*}}global %struct.sysfs_dirent { %union.anon zeroinitializer, i16 16877 } struct Foo { union { struct empty {} x; }; @@ -16,4 +16,4 @@ struct Foo { struct Foo foo = { {}, 16877 }; // EMPTY: @foo = {{.*}}global %struct.Foo { i16 16877 } -// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] zeroinitializer, i16 16877 } +// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] undef, i16 16877 } diff --git a/clang/test/CodeGen/64bit-swiftcall.c b/clang/test/CodeGen/64bit-swiftcall.c index 7f8aa02d97ce1f..7af65ccf556a81 100644 --- a/clang/test/CodeGen/64bit-swiftcall.c +++ b/clang/test/CodeGen/64bit-swiftcall.c @@ -14,6 +14,8 @@ // CHECK-DAG: %struct.atomic_padded = type { { %struct.packed, [7 x i8] } } // CHECK-DAG: %struct.packed = type <{ i64, i8 }> +// +// CHECK: [[STRUCT2_RESULT:@.*]] = private {{.*}} constant [[STRUCT2_TYPE:%.*]] { i32 0, i8 0, i8 undef, i8 0, i32 0, i32 0 } /*****************************************************************************/ /****************************** PARAMETER ABIS *******************************/ @@ -160,8 +162,8 @@ typedef struct { } struct_2; TEST(struct_2); // CHECK-LABEL: define{{.*}} swiftcc { i64, i64 } @return_struct_2() {{.*}}{ -// CHECK: [[RET:%.*]] = alloca [[STRUCT2:%.*]], align 4 -// CHECK: call void @llvm.memset +// CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 +// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[RET]], {{.*}}[[STRUCT2_RESULT]] // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 1 @@ -171,7 +173,7 @@ TEST(struct_2); // CHECK: ret { i64, i64 } [[R1]] // CHECK: } // CHECK-LABEL: define{{.*}} swiftcc void @take_struct_2(i64 %0, i64 %1) {{.*}}{ -// CHECK: [[V:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[V:%.*]] = alloca [[STRUCT:%.*]], align 4 // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 0 // CHECK: store i64 %0, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 1 @@ -179,7 +181,7 @@ TEST(struct_2); // CHECK: ret void // CHECK: } // CHECK-LABEL: define{{.*}} void @test_struct_2() {{.*}} { -// CHECK: [[TMP:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4 // CHECK: [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2() // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw {{.*}} [[TMP]], i32 0, i32 0 // CHECK: [[T0:%.*]] = extractvalue { i64, i64 } [[CALL]], 0 @@ -252,7 +254,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define{{.*}} swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: call void @llvm.memset{{.*}}(ptr align 8 [[RET]] +// CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[RET]] // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw { i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, ptr [[GEP]], align 8 // CHECK: ret i64 [[R0]] diff --git a/clang/test/CodeGen/arm-swiftcall.c b/clang/test/CodeGen/arm-swiftcall.c index 677b878c6765da..ec0e3867909a86 100644 --- a/clang/test/CodeGen/arm-swiftcall.c +++ b/clang/test/CodeGen/arm-swiftcall.c @@ -172,7 +172,7 @@ typedef struct { TEST(struct_2); // CHECK-LABEL: define{{.*}} @return_struct_2() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align 4 -// CHECK: @llvm.memset +// CHECK: @llvm.memcpy // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG:{ i32, i32, float, float }]], ptr [[RET]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, ptr [[T0]], align 4 // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG]], ptr [[RET]], i32 0, i32 1 @@ -274,7 +274,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define{{.*}} @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align {{(4|8)}} -// CHECK: @llvm.memset +// CHECK: @llvm.memcpy // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG:{ i32, i32 }]], ptr [[RET]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, ptr [[T0]], align {{(4|8)}} // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG]], ptr [[RET]], i32 0, i32 1 diff --git a/clang/test/CodeGen/const-init.c b/clang/test/CodeGen/const-init.c index fc973cb983a80a..ad3e9551199ac2 100644 --- a/clang/test/CodeGen/const-init.c +++ b/clang/test/CodeGen/const-init.c @@ -170,7 +170,7 @@ void g30(void) { int : 1; int x; } a = {}; - // CHECK: @g30.a = internal global %struct.anon.1 zeroinitializer, align 1 + // CHECK: @g30.a = internal global %struct.anon.1 <{ i8 undef, i32 0 }>, align 1 #pragma pack() } @@ -182,7 +182,7 @@ void g31(void) { short z; } a = {23122, -12312731, -312}; #pragma pack() - // CHECK: @g31.a = internal global { i16, [2 x i8], i32, i16, [2 x i8] } { i16 23122, [2 x i8] zeroinitializer, i32 -12312731, i16 -312, [2 x i8] zeroinitializer }, align 4 + // CHECK: @g31.a = internal global %struct.anon.2 { i16 23122, i32 -12312731, i16 -312 }, align 4 } // Clang should evaluate this in constant context, so floating point mode should diff --git a/clang/test/CodeGen/decl.c b/clang/test/CodeGen/decl.c index 97446781fdbd2b..a63846b3223da4 100644 --- a/clang/test/CodeGen/decl.c +++ b/clang/test/CodeGen/decl.c @@ -2,10 +2,10 @@ // CHECK: @test1.x = internal constant [12 x i32] [i32 1 // CHECK: @__const.test2.x = private unnamed_addr constant [13 x i32] [i32 1, -// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] zeroinitializer } +// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] undef } // CHECK: @test5y = {{(dso_local )?}}global { double } { double 7.300000e+0{{[0]*}}1 } -// CHECK: @__const.test6.x = private unnamed_addr constant { i8, i8, [2 x i8], i32, i32 } { i8 1, i8 2, [2 x i8] zeroinitializer, i32 3, i32 0 } +// CHECK: @__const.test6.x = private unnamed_addr constant %struct.SelectDest { i8 1, i8 2, i32 3, i32 0 } // CHECK: @test7 = {{(dso_local )?}}global [2 x %struct.test7s] [%struct.test7s { i32 1, i32 2 }, %struct.test7s { i32 4, i32 0 }] diff --git a/clang/test/CodeGen/designated-initializers.c b/clang/test/CodeGen/designated-initializers.c index ac7860db43be77..620b1b90d25758 100644 --- a/clang/test/CodeGen/designated-initializers.c +++ b/clang/test/CodeGen/designated-initializers.c @@ -8,7 +8,7 @@ struct foo { // CHECK: @u ={{.*}} global %union.anon zeroinitializer union { int i; float f; } u = { }; -// CHECK: @u2 ={{.*}} global { i32, [4 x i8] } zeroinitializer +// CHECK: @u2 ={{.*}} global { i32, [4 x i8] } { i32 0, [4 x i8] undef } union { int i; double f; } u2 = { }; // CHECK: @u3 ={{.*}} global %union.anon.1 zeroinitializer @@ -62,22 +62,22 @@ struct overwrite_string_struct2 { char L[6]; int M; } overwrite_string2[] = { { { "foo" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [6 x i8] c"fox\00\00\00", [2 x i8] zeroinitializer, i32 1 +// CHECK: [6 x i8] c"fox\00\00\00", i32 1 struct overwrite_string_struct3 { char L[3]; int M; } overwrite_string3[] = { { { "foo" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [3 x i8] c"fox", i8 0, i32 1 +// CHECK: [3 x i8] c"fox", i32 1 struct overwrite_string_struct4 { char L[3]; int M; } overwrite_string4[] = { { { "foobar" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [3 x i8] c"fox", i8 0, i32 1 +// CHECK: [3 x i8] c"fox", i32 1 struct overwrite_string_struct5 { char L[6]; int M; } overwrite_string5[] = { { { "foo" }, 1 }, [0].L[4] = 'y'}; -// CHECK: [6 x i8] c"foo\00y\00", [2 x i8] zeroinitializer, i32 1 +// CHECK: [6 x i8] c"foo\00y\00", i32 1 // CHECK: @u1 = {{.*}} { i32 65535 } @@ -138,7 +138,7 @@ union_16644_t union_16644_instance_4[2] = [1].b[1] = 4 }; -// CHECK: @lab ={{.*}} global { [4 x i8], i32 } { [4 x i8] zeroinitializer, i32 123 } +// CHECK: @lab ={{.*}} global { [4 x i8], i32 } { [4 x i8] undef, i32 123 } struct leading_anon_bitfield { int : 32; int n; } lab = { .n = 123 }; struct Base { diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c index aebacd6f22ffc4..e3d609a4ba4a2e 100644 --- a/clang/test/CodeGen/ext-int.c +++ b/clang/test/CodeGen/ext-int.c @@ -16,7 +16,7 @@ unsigned _BitInt(1) GlobSize1 = 0; // CHECK: @GlobSize1 = {{.*}}global i8 0 -// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] zeroinitializer, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8 +// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8 // @BigGlob = global [40 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 8 // CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8 @@ -91,8 +91,8 @@ int foo(int a) { // CHECK64: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 2 // WIN32: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 2 // LIN32: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 1 - // CHECK: %[[V1:.+]] = load i32, ptr %a.addr, align 4 - // CHECK: %conv = sext i32 %[[V1]] to i129 + // CHECK: %0 = load i32, ptr %a.addr, align 4 + // CHECK: %conv = sext i32 %0 to i129 // CHECK64: storedv = sext i129 %conv to i192 // WIN32: storedv = sext i129 %conv to i192 // LIN32: storedv = sext i129 %conv to i160 @@ -102,12 +102,12 @@ int foo(int a) { // CHECK64: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 2 // WIN32: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 2 // LIN32: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 1 - // CHECK64: %[[V2:.+]] = load i192, ptr %B3, align 8 - // WIN32: %[[V2:.+]] = load i192, ptr %B3, align 8 - // LIN32: %[[V2:.+]] = load i160, ptr %B3, align 4 - // CHECK64: %loadedv = trunc i192 %[[V2]] to i129 - // WIN32: %loadedv = trunc i192 %[[V2]] to i129 - // LIN32: %loadedv = trunc i160 %[[V2]] to i129 + // CHECK64: %1 = load i192, ptr %B3, align 8 + // WIN32: %1 = load i192, ptr %B3, align 8 + // LIN32: %1 = load i160, ptr %B3, align 4 + // CHECK64: %loadedv = trunc i192 %1 to i129 + // WIN32: %loadedv = trunc i192 %1 to i129 + // LIN32: %loadedv = trunc i160 %1 to i129 // CHECK: %conv4 = trunc i129 %loadedv to i32 struct S1 A = {1, 170}; struct S1 B = {1, a}; diff --git a/clang/test/CodeGen/flexible-array-init.c b/clang/test/CodeGen/flexible-array-init.c index 17b520fe830942..15a30c15ac966e 100644 --- a/clang/test/CodeGen/flexible-array-init.c +++ b/clang/test/CodeGen/flexible-array-init.c @@ -14,11 +14,11 @@ struct { int y[]; } b1 = { { 14, 16 } }; // sizeof(c) == 8, so this global should be at least 8 bytes. struct { int x; char c; char y[]; } c = { 1, 2, { 13, 15 } }; -// CHECK: @c ={{.*}} global { i32, i8, [2 x i8], i8 } { i32 1, i8 2, [2 x i8] c"\0D\0F", i8 0 } +// CHECK: @c ={{.*}} global { i32, i8, [2 x i8] } { i32 1, i8 2, [2 x i8] c"\0D\0F" } // sizeof(d) == 8, so this global should be at least 8 bytes. struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } d = { 1, 2, { 13, 15 } }; -// CHECK: @d ={{.*}} <{ i8, i32, [2 x i8], i8 }> <{ i8 1, i32 2, [2 x i8] c"\0D\0F", i8 0 }>, +// CHECK: @d ={{.*}} <{ i8, i32, [2 x i8], i8 }> <{ i8 1, i32 2, [2 x i8] c"\0D\0F", i8 undef }>, // This global needs 9 bytes to hold all the flexible array members. struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } e = { 1, 2, { 13, 15, 17, 19 } }; @@ -55,21 +55,21 @@ struct { int a; union { int b; short x[]; }; int c; int d; } hf = {1, 2, {}, 3}; // First member is the potential flexible array, initialization requires braces. struct { int a; union { short x; int b; }; int c; int d; } i = {1, 2, {}, 3}; -// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] zeroinitializer }, i32 0, i32 3 } +// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 0, i32 3 } struct { int a; union { short x[0]; int b; }; int c; int d; } i0 = {1, {}, 2, 3}; -// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } zeroinitializer, i32 2, i32 3 } +// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 2, i32 3 } struct { int a; union { short x[1]; int b; }; int c; int d; } i1 = {1, {2}, {}, 3}; -// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] zeroinitializer }, i32 0, i32 3 } +// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 0, i32 3 } struct { int a; union { short x[]; int b; }; int c; int d; } i_f = {4, {}, {}, 6}; -// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } zeroinitializer, i32 0, i32 6 } +// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 0, i32 6 } // Named initializers; order doesn't matter. struct { int a; union { int b; short x; }; int c; int d; } hn = {.a = 1, .x = 2, .c = 3}; -// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] zeroinitializer }, i32 3, i32 0 } +// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 3, i32 0 } struct { int a; union { int b; short x[0]; }; int c; int d; } hn0 = {.a = 1, .x = {2}, .c = 3}; -// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } zeroinitializer, i32 3, i32 0 } +// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 3, i32 0 } struct { int a; union { int b; short x[1]; }; int c; int d; } hn1 = {.a = 1, .x = {2}, .c = 3}; -// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] zeroinitializer }, i32 3, i32 0 } +// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 3, i32 0 } struct { char a[]; } empty_struct = {}; // CHECK: @empty_struct ={{.*}} global %struct.anon{{.*}} zeroinitializer, align 1 @@ -96,10 +96,10 @@ union { char a[]; } only_in_union0 = {0}; // CHECK: @only_in_union0 = global { [1 x i8] } zeroinitializer, align 1 union { char a[]; int b; } first_in_union = {}; -// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } zeroinitializer, align 4 +// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } { [0 x i8] zeroinitializer, [4 x i8] undef }, align 4 union { char a[]; int b; } first_in_union0 = {0}; -// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } zeroinitializer, align 4 +// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } { [1 x i8] zeroinitializer, [3 x i8] undef }, align 4 union { char a[]; int b; } first_in_union123 = { {1, 2, 3} }; -// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 0 }, align 4 +// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 undef }, align 4 diff --git a/clang/test/CodeGen/global-init.c b/clang/test/CodeGen/global-init.c index b156466dbaaffc..7f1d675b97c09e 100644 --- a/clang/test/CodeGen/global-init.c +++ b/clang/test/CodeGen/global-init.c @@ -33,7 +33,7 @@ struct ManyFields { int f; }; -// CHECK: global { i32, i32, i32, i8, [3 x i8], i32, i32 } { i32 1, i32 2, i32 0, i8 0, [3 x i8] zeroinitializer, i32 0, i32 0 } +// CHECK: global %struct.ManyFields { i32 1, i32 2, i32 0, i8 0, i32 0, i32 0 } struct ManyFields FewInits = {1, 2}; diff --git a/clang/test/CodeGen/init.c b/clang/test/CodeGen/init.c index 27f427dff8f79e..cbf615bb9ddfea 100644 --- a/clang/test/CodeGen/init.c +++ b/clang/test/CodeGen/init.c @@ -187,6 +187,25 @@ void nonzeroMemsetf64(void) { // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 68, i32 56, i1 false) } +void nonzeroPaddedUnionMemset(void) { + union U { char c; int i; }; + union U arr[9] = { 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, }; + // CHECK-LABEL: @nonzeroPaddedUnionMemset( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 -16, i32 36, i1 false) +} + +void nonzeroNestedMemset(void) { + union U { char c; int i; }; + struct S { union U u; short i; }; + struct S arr[5] = { { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, }; + // CHECK-LABEL: @nonzeroNestedMemset( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 -16, i32 40, i1 false) +} + // PR9257 struct test11S { int A[10]; diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c deleted file mode 100644 index dc68cc0f454c8c..00000000000000 --- a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c +++ /dev/null @@ -1,267 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=gnu11 -verify -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -union U1 { - int x; - char y[16]; -}; - -struct S1 { - int x; - union U1 y; -}; - -union U2 { - int x; - char y[16]; -} __attribute__((__aligned__(32))); - -struct S2 { - int x; - long long y; - char z[8]; -} __attribute__((__aligned__(32))); - -union U1 global_u1 = {}; - -union U1 global_u2 = {3}; - -union U1 global_u2_from_cast = (union U1)3; - -struct S1 global_s1 = {}; - -struct S1 global_s2 = { - .x = 3, -}; - -struct S1 global_s3 = {.x = 3, .y = {.x = 6}}; - -const union U1 global_const_u1 = {4}; -struct S1 global_s3_from_const_u1 = {.y = global_const_u1}; - -union U2 global_u3 = {}; - -struct S2 global_s4 = {}; - -struct S2 global_s5 = {.x = 1}; - - -// Test empty initializer for union. -//. -// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 4 -// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 4 -// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { i32 4, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @global_u3 = global %union.U2 zeroinitializer, align 32 -// CHECK: @global_s4 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 -// CHECK: @global_s5 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 -// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 4 -// CHECK: @__const.test3.a = private unnamed_addr constant %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 4 -// CHECK: @__const.test7.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @__const.test9.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @test12.a = internal global %union.U2 zeroinitializer, align 32 -// CHECK: @test14.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 -// CHECK: @__const.test15.s = private unnamed_addr constant { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 -// CHECK: @test16.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 -//. -// CHECK-LABEL: define dso_local void @test1( -// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 16, i1 false) -// CHECK-NEXT: ret void -// -void test1() { - union U1 a = {}; -} - -// Test empty initializer for union. Use static variable. -// CHECK-LABEL: define dso_local void @test2( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test2() { - static union U1 a = {}; -} - -// Test only initializing a small field for union. -// CHECK-LABEL: define dso_local void @test3( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 @__const.test3.a, i64 16, i1 false) -// CHECK-NEXT: ret void -// -void test3() { - union U1 a = {3}; -} - -// Test only initializing a small field for union. Use static variable. -// CHECK-LABEL: define dso_local void @test4( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test4() { - static union U1 a = {3}; -} - -// Test union in struct. Use empty initializer for the struct. -// CHECK-LABEL: define dso_local void @test5( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[S]], i8 0, i64 20, i1 false) -// CHECK-NEXT: ret void -// -void test5() { - struct S1 s = {}; -} - -// Test union in struct. Use empty initializer for the struct. Use static variable. -// CHECK-LABEL: define dso_local void @test6( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test6() { - static struct S1 s = {}; -} - -// Test union in struct. Initialize other fields of the struct. -// CHECK-LABEL: define dso_local void @test7( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @__const.test7.s, i64 20, i1 false) -// CHECK-NEXT: ret void -// -void test7() { - struct S1 s = { - .x = 3, - }; -} - -// Test union in struct. Initialize other fields of the struct. Use static variable. -// CHECK-LABEL: define dso_local void @test8( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test8() { - static struct S1 s = { - .x = 3, - }; -} - -// Test union in struct. Initialize a small field for union. -// CHECK-LABEL: define dso_local void @test9( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @__const.test9.s, i64 20, i1 false) -// CHECK-NEXT: ret void -// -void test9() { - struct S1 s = {.x = 3, - .y = { - .x = 6, - }}; -} - -// Test union in struct. Initialize a small field for union. Use static variable. -// CHECK-LABEL: define dso_local void @test10( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test10() { - static struct S1 s = {.x = 3, - .y = { - .x = 6, - }}; -} - -// Test empty initializer for union with padding. -// CHECK-LABEL: define dso_local void @test11( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U2:%.*]], align 32 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[A]], i8 0, i64 32, i1 false) -// CHECK-NEXT: ret void -// -void test11() { - union U2 a = {}; -} - -// Test empty initializer for union with padding. Use static variable. -// CHECK-LABEL: define dso_local void @test12( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test12() { - static union U2 a = {}; -} - -// Test empty initializer for struct with padding. -// CHECK-LABEL: define dso_local void @test13( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 32 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[S]], i8 0, i64 32, i1 false) -// CHECK-NEXT: ret void -// -void test13() { - struct S2 s = {}; -} - -// Test empty initializer for struct with padding. Use static variable. -// CHECK-LABEL: define dso_local void @test14( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test14() { - static struct S2 s = {}; -} - -// Test partial initialization for struct with padding. -// CHECK-LABEL: define dso_local void @test15( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 32 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 [[S]], ptr align 32 @__const.test15.s, i64 32, i1 false) -// CHECK-NEXT: ret void -// -void test15() { - struct S2 s = {.x = 1}; -} - -// Test partial initialization for struct with padding. Use static variable. -// CHECK-LABEL: define dso_local void @test16( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test16() { - static struct S2 s = {.x = 1}; -} -//. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -// CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -//. -// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -//. diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c deleted file mode 100644 index 0a1ad3a369eacc..00000000000000 --- a/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c +++ /dev/null @@ -1,140 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=gnu11 -verify -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -union U1 { - int x; - char y[5]; -}; - -struct S1 { - int x; - long long y; -}; - -struct S2 { - unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1 - unsigned char : 2; // next 2 bits (in 1st byte) are blocked out as unused - unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd - unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte - int i; -}; - -struct S3 { - int x; -} __attribute__((__aligned__(8))); - -struct S4 { - int a; - union U1 b; -}; - -// Test non-const initializer for union with padding. -// CHECK-LABEL: define dso_local void @test1( -// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 -// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[A]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 4, i1 false) -// CHECK-NEXT: ret void -// -void test1(int x) { - union U1 a = {x}; -} - -// Test non-const initializer for struct with padding. -// CHECK-LABEL: define dso_local void @test2( -// CHECK-SAME: i64 noundef [[Y:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 -// CHECK-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: store i32 0, ptr [[X]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 4, i1 false) -// CHECK-NEXT: [[Y1:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[S]], i32 0, i32 1 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y_ADDR]], align 8 -// CHECK-NEXT: store i64 [[TMP1]], ptr [[Y1]], align 8 -// CHECK-NEXT: ret void -// -void test2(long long y) { - struct S1 s = {.y = y}; -} - -// Test non-const initializer for struct with padding and bit fields. -// CHECK-LABEL: define dso_local void @test3( -// CHECK-SAME: i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 4 -// CHECK-NEXT: store i8 [[B]], ptr [[B_ADDR]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i16 -// CHECK-NEXT: [[BF_LOAD:%.*]] = load i16, ptr [[S]], align 4 -// CHECK-NEXT: [[BF_VALUE:%.*]] = and i16 [[TMP1]], 7 -// CHECK-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -8 -// CHECK-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK-NEXT: store i16 [[BF_SET]], ptr [[S]], align 4 -// CHECK-NEXT: [[BF_LOAD1:%.*]] = load i16, ptr [[S]], align 4 -// CHECK-NEXT: [[BF_CLEAR2:%.*]] = and i16 [[BF_LOAD1]], -16129 -// CHECK-NEXT: [[BF_SET3:%.*]] = or i16 [[BF_CLEAR2]], 0 -// CHECK-NEXT: store i16 [[BF_SET3]], ptr [[S]], align 4 -// CHECK-NEXT: [[BF_LOAD4:%.*]] = load i16, ptr [[S]], align 4 -// CHECK-NEXT: [[BF_CLEAR5:%.*]] = and i16 [[BF_LOAD4]], 16383 -// CHECK-NEXT: [[BF_SET6:%.*]] = or i16 [[BF_CLEAR5]], 0 -// CHECK-NEXT: store i16 [[BF_SET6]], ptr [[S]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i64 2 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP2]], i8 0, i64 2, i1 false) -// CHECK-NEXT: [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S2]], ptr [[S]], i32 0, i32 1 -// CHECK-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-NEXT: ret void -// -void test3(unsigned char b) { - struct S2 s = {.b1 = b}; -} - -// Test non-const initializer for struct with padding at the end of the struct. -// CHECK-LABEL: define dso_local void @test4( -// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S3:%.*]], align 8 -// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_S3]], ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[X1]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 4, i1 false) -// CHECK-NEXT: ret void -// -void test4(int x) { - struct S3 s = {x}; -} - -// Test non-const initializer for union in struct. -// CHECK-LABEL: define dso_local void @test5( -// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S4:%.*]], align 4 -// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_S4]], ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[A1]], align 4 -// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_S4]], ptr [[S]], i32 0, i32 1 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[B2]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B2]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 4, i1 false) -// CHECK-NEXT: ret void -// -void test5(int a, int b) { - struct S4 s = {a, {b}}; -} diff --git a/clang/test/CodeGen/mingw-long-double.c b/clang/test/CodeGen/mingw-long-double.c index 0fc8f015096827..4be97526f96319 100644 --- a/clang/test/CodeGen/mingw-long-double.c +++ b/clang/test/CodeGen/mingw-long-double.c @@ -11,9 +11,12 @@ struct { char c; long double ldb; } agggregate_LD = {}; -// GNU32: @agggregate_LD = dso_local global { i8, [3 x i8], x86_fp80 } zeroinitializer, align 4 -// GNU64: @agggregate_LD = dso_local global { i8, [15 x i8], x86_fp80 } zeroinitializer, align 16 -// MSC64: @agggregate_LD = dso_local global { i8, [7 x i8], double } zeroinitializer, align 8 +// GNU32: %struct.anon = type { i8, x86_fp80 } +// GNU32: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 4 +// GNU64: %struct.anon = type { i8, x86_fp80 } +// GNU64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 16 +// MSC64: %struct.anon = type { i8, double } +// MSC64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 8 long double dataLD = 1.0L; // GNU32: @dataLD = dso_local global x86_fp80 0xK3FFF8000000000000000, align 4 diff --git a/clang/test/CodeGen/mms-bitfields.c b/clang/test/CodeGen/mms-bitfields.c index 2ccce326c7131d..49c5c1c3e7d40d 100644 --- a/clang/test/CodeGen/mms-bitfields.c +++ b/clang/test/CodeGen/mms-bitfields.c @@ -61,5 +61,5 @@ union HEADER { struct Inner variable = { 1,0,1, 21 }; union HEADER hdr = {{1,2,3,4}}; -// CHECK: @variable ={{.*}} global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] zeroinitializer, i8 21, i8 0, i8 0, i8 0 }, align 1 -// CHECK: @hdr ={{.*}} global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] zeroinitializer, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] zeroinitializer } }, align 1 +// CHECK: @variable ={{.*}} global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] undef, i8 21, i8 0, i8 0, i8 0 }, align 1 +// CHECK: @hdr ={{.*}} global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] undef, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] undef } }, align 1 diff --git a/clang/test/CodeGen/union-init2.c b/clang/test/CodeGen/union-init2.c index ee35e78a4f3010..048ff00517b4e8 100644 --- a/clang/test/CodeGen/union-init2.c +++ b/clang/test/CodeGen/union-init2.c @@ -2,11 +2,11 @@ // RUN: %clang_cc1 -x c++ %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s --check-prefixes=CHECK-CXX // Make sure we generate something sane instead of a ptrtoint -// CHECK: @r, [4 x i8] zeroinitializer +// CHECK: @r, [4 x i8] undef union x {long long b;union x* a;} r = {.a = &r}; -// CHECK: global { [3 x i8], [5 x i8] } zeroinitializer +// CHECK: global { [3 x i8], [5 x i8] } { [3 x i8] zeroinitializer, [5 x i8] undef } union z { char a[3]; long long b; diff --git a/clang/test/CodeGen/windows-swiftcall.c b/clang/test/CodeGen/windows-swiftcall.c index 41569c2606622f..bc7832d9d3ac28 100644 --- a/clang/test/CodeGen/windows-swiftcall.c +++ b/clang/test/CodeGen/windows-swiftcall.c @@ -5,6 +5,8 @@ #define ERROR __attribute__((swift_error_result)) #define CONTEXT __attribute__((swift_context)) +// CHECK: [[STRUCT2_RESULT:@.*]] = private {{.*}} constant [[STRUCT2_TYPE:%.*]] { i32 0, i8 0, i8 undef, i8 0, i32 0, i32 0 } + /*****************************************************************************/ /****************************** PARAMETER ABIS *******************************/ /*****************************************************************************/ @@ -140,8 +142,8 @@ typedef struct { } struct_2; TEST(struct_2); // CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_2() {{.*}}{ -// CHECK: [[RET:%.*]] = alloca [[STRUCT2:%.*]], align 4 -// CHECK: call void @llvm.memset +// CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 +// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[RET]], {{.*}}[[STRUCT2_RESULT]] // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 1 @@ -151,7 +153,7 @@ TEST(struct_2); // CHECK: ret { i64, i64 } [[R1]] // CHECK: } // CHECK-LABEL: define dso_local swiftcc void @take_struct_2(i64 %0, i64 %1) {{.*}}{ -// CHECK: [[V:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[V:%.*]] = alloca [[STRUCT:%.*]], align 4 // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 0 // CHECK: store i64 %0, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 1 @@ -159,7 +161,7 @@ TEST(struct_2); // CHECK: ret void // CHECK: } // CHECK-LABEL: define dso_local void @test_struct_2() {{.*}} { -// CHECK: [[TMP:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4 // CHECK: [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2() // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw {{.*}} [[TMP]], i32 0, i32 0 // CHECK: [[T0:%.*]] = extractvalue { i64, i64 } [[CALL]], 0 @@ -232,7 +234,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define dso_local swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: call void @llvm.memset{{.*}}(ptr align {{[0-9]+}} [[RET]] +// CHECK: call void @llvm.memcpy{{.*}}(ptr align {{[0-9]+}} [[RET]] // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw { i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, ptr [[GEP]], align 8 // CHECK: ret i64 [[R0]] diff --git a/clang/test/CodeGenObjC/designated-initializers.m b/clang/test/CodeGenObjC/designated-initializers.m index ce58f6c367338e..a67f82e1afbea8 100644 --- a/clang/test/CodeGenObjC/designated-initializers.m +++ b/clang/test/CodeGenObjC/designated-initializers.m @@ -4,4 +4,4 @@ char L[3]; int M; } overwrite_string[] = { { { @encode(void**) }, 1 }, [0].L[1] = 'x'}; -// CHECK: [3 x i8] c"^xv", i8 0, i32 1 +// CHECK: [3 x i8] c"^xv", i32 1 From 81dac7d613c945cf56925162081eaf4a998cea8a Mon Sep 17 00:00:00 2001 From: harishch4 Date: Wed, 25 Sep 2024 09:29:44 +0530 Subject: [PATCH 06/65] [Flang][OpenMP] Add Semantic Checks for Atomic Capture Construct (#108516) This PR adds semantic checks to ensure the atomic capture construct conforms to one of the valid forms: [capture-stmt, update-stmt], [capture-stmt, write-stmt] or [update-stmt, capture-stmt]. Functions checkForSymbolMatch and checkForSingleVariableOnRHS are moved from flang/lib/Lower/DirectivesCommon.h to flang/Semantics/tools.h for reuse. --------- Co-authored-by: Kiran Chandramohan --- flang/include/flang/Semantics/tools.h | 29 +++++++++ flang/lib/Lower/DirectivesCommon.h | 34 +--------- flang/lib/Semantics/check-omp-structure.cpp | 64 +++++++++++++++++-- flang/lib/Semantics/check-omp-structure.h | 1 + .../OpenMP/omp-atomic-assignment-stmt.f90 | 64 +++++++++++++++++++ .../Semantics/OpenMP/requires-atomic01.f90 | 6 +- .../Semantics/OpenMP/requires-atomic02.f90 | 6 +- 7 files changed, 161 insertions(+), 43 deletions(-) diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 15c02ecc0058cc..96d4dbb2acaa11 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -736,5 +736,34 @@ std::string GetCommonBlockObjectName(const Symbol &, bool underscoring); // Check for ambiguous USE associations bool HadUseError(SemanticsContext &, SourceName at, const Symbol *); +/// Checks if the assignment statement has a single variable on the RHS. +inline bool checkForSingleVariableOnRHS( + const Fortran::parser::AssignmentStmt &assignmentStmt) { + const Fortran::parser::Expr &expr{ + std::get(assignmentStmt.t)}; + const Fortran::common::Indirection *designator = + std::get_if>( + &expr.u); + return designator != nullptr; +} + +/// Checks if the symbol on the LHS of the assignment statement is present in +/// the RHS expression. +inline bool checkForSymbolMatch( + const Fortran::parser::AssignmentStmt &assignmentStmt) { + const auto &var{std::get(assignmentStmt.t)}; + const auto &expr{std::get(assignmentStmt.t)}; + const auto *e{Fortran::semantics::GetExpr(expr)}; + const auto *v{Fortran::semantics::GetExpr(var)}; + auto varSyms{Fortran::evaluate::GetSymbolVector(*v)}; + const Fortran::semantics::Symbol &varSymbol{*varSyms.front()}; + for (const Fortran::semantics::Symbol &symbol : + Fortran::evaluate::GetSymbolVector(*e)) { + if (varSymbol == symbol) { + return true; + } + } + return false; +} } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_TOOLS_H_ diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index d2060e77ce5305..a32f0b287e049a 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -74,34 +74,6 @@ struct AddrAndBoundsInfo { } }; -/// Checks if the assignment statement has a single variable on the RHS. -static inline bool checkForSingleVariableOnRHS( - const Fortran::parser::AssignmentStmt &assignmentStmt) { - const Fortran::parser::Expr &expr{ - std::get(assignmentStmt.t)}; - const Fortran::common::Indirection *designator = - std::get_if>( - &expr.u); - return designator != nullptr; -} - -/// Checks if the symbol on the LHS of the assignment statement is present in -/// the RHS expression. -static inline bool -checkForSymbolMatch(const Fortran::parser::AssignmentStmt &assignmentStmt) { - const auto &var{std::get(assignmentStmt.t)}; - const auto &expr{std::get(assignmentStmt.t)}; - const auto *e{Fortran::semantics::GetExpr(expr)}; - const auto *v{Fortran::semantics::GetExpr(var)}; - auto varSyms{Fortran::evaluate::GetSymbolVector(*v)}; - const Fortran::semantics::Symbol &varSymbol{*varSyms.front()}; - for (const Fortran::semantics::Symbol &symbol : - Fortran::evaluate::GetSymbolVector(*e)) - if (varSymbol == symbol) - return true; - return false; -} - /// Populates \p hint and \p memoryOrder with appropriate clause information /// if present on atomic construct. static inline void genOmpAtomicHintAndMemoryOrderClauses( @@ -537,7 +509,7 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, stmt2LHSArg = fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx)); // Operation specific RHS evaluations - if (checkForSingleVariableOnRHS(stmt1)) { + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] or // of the form [capture-stmt, write-stmt] stmt1RHSArg = fir::getBase(converter.genExprAddr(assign1.rhs, stmtCtx)); @@ -573,8 +545,8 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); mlir::Block &block = atomicCaptureOp->getRegion(0).back(); firOpBuilder.setInsertionPointToStart(&block); - if (checkForSingleVariableOnRHS(stmt1)) { - if (checkForSymbolMatch(stmt2)) { + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { + if (Fortran::semantics::checkForSymbolMatch(stmt2)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] const Fortran::semantics::SomeExpr &fromExpr = *Fortran::semantics::GetExpr(stmt1Expr); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 643b713b32e29d..dfc3f3290a81be 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1977,6 +1977,58 @@ void OmpStructureChecker::CheckAtomicUpdateStmt( ErrIfAllocatableVariable(var); } +// TODO: Allow cond-update-stmt once compare clause is supported. +void OmpStructureChecker::CheckAtomicCaptureConstruct( + const parser::OmpAtomicCapture &atomicCaptureConstruct) { + const Fortran::parser::AssignmentStmt &stmt1 = + std::get( + atomicCaptureConstruct.t) + .v.statement; + const auto &stmt1Var{std::get(stmt1.t)}; + const auto &stmt1Expr{std::get(stmt1.t)}; + + const Fortran::parser::AssignmentStmt &stmt2 = + std::get( + atomicCaptureConstruct.t) + .v.statement; + const auto &stmt2Var{std::get(stmt2.t)}; + const auto &stmt2Expr{std::get(stmt2.t)}; + + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { + CheckAtomicCaptureStmt(stmt1); + if (Fortran::semantics::checkForSymbolMatch(stmt2)) { + // ATOMIC CAPTURE construct is of the form [capture-stmt, update-stmt] + CheckAtomicUpdateStmt(stmt2); + } else { + // ATOMIC CAPTURE construct is of the form [capture-stmt, write-stmt] + CheckAtomicWriteStmt(stmt2); + } + auto *v{stmt2Var.typedExpr.get()}; + auto *e{stmt1Expr.typedExpr.get()}; + if (v && e && !(v->v == e->v)) { + context_.Say(stmt1Expr.source, + "Captured variable/array element/derived-type component %s expected to be assigned in the second statement of ATOMIC CAPTURE construct"_err_en_US, + stmt1Expr.source); + } + } else if (Fortran::semantics::checkForSymbolMatch(stmt1) && + Fortran::semantics::checkForSingleVariableOnRHS(stmt2)) { + // ATOMIC CAPTURE construct is of the form [update-stmt, capture-stmt] + CheckAtomicUpdateStmt(stmt1); + CheckAtomicCaptureStmt(stmt2); + // Variable updated in stmt1 should be captured in stmt2 + auto *v{stmt1Var.typedExpr.get()}; + auto *e{stmt2Expr.typedExpr.get()}; + if (v && e && !(v->v == e->v)) { + context_.Say(stmt1Var.GetSource(), + "Updated variable/array element/derived-type component %s expected to be captured in the second statement of ATOMIC CAPTURE construct"_err_en_US, + stmt1Var.GetSource()); + } + } else { + context_.Say(stmt1Expr.source, + "Invalid ATOMIC CAPTURE construct statements. Expected one of [update-stmt, capture-stmt], [capture-stmt, update-stmt], or [capture-stmt, write-stmt]"_err_en_US); + } +} + void OmpStructureChecker::CheckAtomicMemoryOrderClause( const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList) { @@ -2060,15 +2112,15 @@ void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { atomicWrite.t) .statement); }, - [&](const auto &atomicConstruct) { - const auto &dir{std::get(atomicConstruct.t)}; + [&](const parser::OmpAtomicCapture &atomicCapture) { + const auto &dir{std::get(atomicCapture.t)}; PushContextAndClauseSets( dir.source, llvm::omp::Directive::OMPD_atomic); - CheckAtomicMemoryOrderClause(&std::get<0>(atomicConstruct.t), - &std::get<2>(atomicConstruct.t)); + CheckAtomicMemoryOrderClause( + &std::get<0>(atomicCapture.t), &std::get<2>(atomicCapture.t)); CheckHintClause( - &std::get<0>(atomicConstruct.t), - &std::get<2>(atomicConstruct.t)); + &std::get<0>(atomicCapture.t), &std::get<2>(atomicCapture.t)); + CheckAtomicCaptureConstruct(atomicCapture); }, }, x.u); diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 2cc1a78068f540..8bfd4d594b028e 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -193,6 +193,7 @@ class OmpStructureChecker void CheckAtomicUpdateStmt(const parser::AssignmentStmt &); void CheckAtomicCaptureStmt(const parser::AssignmentStmt &); void CheckAtomicWriteStmt(const parser::AssignmentStmt &); + void CheckAtomicCaptureConstruct(const parser::OmpAtomicCapture &); void CheckAtomicConstructStructure(const parser::OpenMPAtomicConstruct &); void CheckDistLinear(const parser::OpenMPLoopConstruct &x); void CheckSIMDNest(const parser::OpenMPConstruct &x); diff --git a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 index a346056dee383b..0d4da5485af046 100644 --- a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 +++ b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 @@ -84,4 +84,68 @@ program sample !$omp atomic write !ERROR: Expected scalar variable on the LHS of atomic assignment statement a = x + + !$omp atomic capture + v = x + x = x + 1 + !$omp end atomic + + !$omp atomic release capture + v = x + !ERROR: Atomic update statement should be of form `x = x operator expr` OR `x = expr operator x` + x = b + (x*1) + !$omp end atomic + + !$omp atomic capture hint(0) + v = x + x = 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component x expected to be assigned in the second statement of ATOMIC CAPTURE construct + v = x + b = b + 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component x expected to be assigned in the second statement of ATOMIC CAPTURE construct + v = x + b = 10 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component x expected to be captured in the second statement of ATOMIC CAPTURE construct + x = x + 10 + v = b + !$omp end atomic + + !$omp atomic capture + !ERROR: Invalid ATOMIC CAPTURE construct statements. Expected one of [update-stmt, capture-stmt], [capture-stmt, update-stmt], or [capture-stmt, write-stmt] + v = 1 + x = 4 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component z%y expected to be assigned in the second statement of ATOMIC CAPTURE construct + x = z%y + z%m = z%m + 1.0 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component z%m expected to be captured in the second statement of ATOMIC CAPTURE construct + z%m = z%m + 1.0 + x = z%y + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component y(2) expected to be assigned in the second statement of ATOMIC CAPTURE construct + x = y(2) + y(1) = y(1) + 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component y(1) expected to be captured in the second statement of ATOMIC CAPTURE construct + y(1) = y(1) + 1 + x = y(2) + !$omp end atomic end program diff --git a/flang/test/Semantics/OpenMP/requires-atomic01.f90 b/flang/test/Semantics/OpenMP/requires-atomic01.f90 index b39c9cdcc0bb33..cb7b1bc1ac52ab 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic01.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic01.f90 @@ -88,7 +88,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> SeqCst !$omp atomic capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -96,7 +96,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic relaxed capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -104,6 +104,6 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic capture relaxed i = j - i = j + j = j + 1 !$omp end atomic end program requires diff --git a/flang/test/Semantics/OpenMP/requires-atomic02.f90 b/flang/test/Semantics/OpenMP/requires-atomic02.f90 index 3af83970e7927a..5a4249794f7b50 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic02.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic02.f90 @@ -88,7 +88,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> AcqRel !$omp atomic capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -96,7 +96,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic relaxed capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -104,6 +104,6 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic capture relaxed i = j - i = j + j = j + 1 !$omp end atomic end program requires From cfe1adc42a5e6a81741f868aff2cf06cec3c24c1 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 24 Sep 2024 21:06:13 -0700 Subject: [PATCH 07/65] Reland: [DirectX] Add atan2 intrinsic and expand for DXIL backend (p1) (#109878) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This preliminary work adds the intrinsic to llvm and expands using atan intrinsic for DXIL backend, since DXIL has no atan2 op. Part 1 for Implement the atan2 HLSL Function #70096. (reland #108865 reverted in #109842 due to doc build break) --- llvm/docs/LangRef.rst | 37 ++++++++ llvm/include/llvm/IR/Intrinsics.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 52 +++++++++++ llvm/test/CodeGen/DirectX/atan2.ll | 87 +++++++++++++++++++ llvm/test/CodeGen/DirectX/atan2_error.ll | 11 +++ 5 files changed, 188 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/atan2.ll create mode 100644 llvm/test/CodeGen/DirectX/atan2_error.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 91c3e60bb0acb1..3b905c2788128c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15583,6 +15583,43 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.atan2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.atan2`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.atan2.f32(float %X, float %Y) + declare double @llvm.atan2.f64(double %X, double %Y) + declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) + declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) + declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) + +Overview: +""""""""" + +The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +Return the same value as a corresponding libm '``atan2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.sinh.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0a74a217a5f010..48d57907e6d0bc 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index dd73b895b14d37..926cbe97f24fda 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,6 +36,7 @@ using namespace llvm; static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: + case Intrinsic::atan2: case Intrinsic::exp: case Intrinsic::log: case Intrinsic::log10: @@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, MultiplicandVec); } +static Value *expandAtan2Intrinsic(CallInst *Orig) { + Value *Y = Orig->getOperand(0); + Value *X = Orig->getOperand(1); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig); + Builder.setFastMathFlags(Orig->getFastMathFlags()); + + Value *Tan = Builder.CreateFDiv(Y, X); + + CallInst *Atan = + Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan"); + Atan->setTailCall(Orig->isTailCall()); + Atan->setAttributes(Orig->getAttributes()); + + // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. + Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi); + Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2); + Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2); + Constant *Zero = ConstantFP::get(Ty, 0); + Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi); + Value *AtanSubPi = Builder.CreateFSub(Atan, Pi); + + // x > 0 -> atan. + Value *Result = Atan; + Value *XLt0 = Builder.CreateFCmpOLT(X, Zero); + Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero); + Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero); + Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero); + + // x < 0, y >= 0 -> atan + pi. + Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0); + Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result); + + // x < 0, y < 0 -> atan - pi. + Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0); + Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result); + + // x == 0, y < 0 -> -pi/2 + Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0); + Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result); + + // x == 0, y > 0 -> pi/2 + Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0); + Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result); + + return Result; +} + static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); @@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::atan2: + Result = expandAtan2Intrinsic(Orig); + break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll new file mode 100644 index 00000000000000..9d86f87f3ed50e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; Make sure correct dxil expansions for atan2 are generated for float and half. + +define noundef float @atan2_float(float noundef %y, float noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv float %y, %x +; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]] +; CHECK: ret float [[SELECT_HPI]] + %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x) + ret float %elt.atan2 +} + +define noundef half @atan2_half(half noundef %y, half noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv half %y, %x +; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]] +; CHECK: ret half [[SELECT_HPI]] + %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x) + ret half %elt.atan2 +} + +define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) { +entry: +; Just Expansion, no scalarization or lowering: +; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x +; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]]) +; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer +; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer +; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]] +; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]] +; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> , <4 x float> [[SELECT_SUB_PI]] +; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> , <4 x float> [[SELECT_NEGHPI]] +; EXPCHECK: ret <4 x float> [[SELECT_HPI]] + +; Scalarization occurs after expansion, so atan scalarization is tested separately. +; Expansion, scalarization and lowering: +; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls. +; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}}) +; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17, + + %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x) + ret <4 x float> %elt.atan2 +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll new file mode 100644 index 00000000000000..5b3077f85f5d4e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2_error.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation atan does not support double overload type +; CHECK: in function atan2_double +; CHECK-SAME: Cannot create ATan operation: Invalid overload type + +define noundef double @atan2_double(double noundef %a, double noundef %b) #0 { +entry: + %1 = call double @llvm.atan2.f64(double %a, double %b) + ret double %1 +} From b2180481ec2d12628c7d5a784ca4f015e971266c Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 24 Sep 2024 21:11:13 -0700 Subject: [PATCH 08/65] [hwasan] Consider order of mapping copts (#109621) Flags "-hwasan-mapping-offset" and "-hwasan-mapping-offset-dynamic" are mutually exclusive, use the last one. --- .../Instrumentation/HWAddressSanitizer.cpp | 12 +++-- .../HWAddressSanitizer/mapping-override.ll | 50 +++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 7d3db5c2c8d5c3..669b63343e994e 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1937,14 +1937,18 @@ void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple, // Fuchsia is always PIE, which means that the beginning of the address // space is always available. SetFixed(0); - } else if (ClMappingOffset.getNumOccurrences() > 0) { - SetFixed(ClMappingOffset); } else if (ClEnableKhwasan || InstrumentWithCalls) { SetFixed(0); WithFrameRecord = false; - } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) { - Kind = ClMappingOffsetDynamic; } WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord); + + // Apply the last of ClMappingOffset and ClMappingOffsetDynamic. + Kind = optOr(ClMappingOffsetDynamic, Kind); + if (ClMappingOffset.getNumOccurrences() > 0 && + !(ClMappingOffsetDynamic.getNumOccurrences() > 0 && + ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) { + SetFixed(ClMappingOffset); + } } diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll new file mode 100644 index 00000000000000..5cd23f3ebe2b07 --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 + +; RUN: opt < %s -passes=hwasan -S | FileCheck %s +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -S | FileCheck %s --check-prefixes=GLOBAL +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=FIXED +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -hwasan-mapping-offset-dynamic=global -S | FileCheck %s --check-prefixes=FIXED-GLOBAL +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=GLOBAL-FIXED + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +define i8 @test_load8(ptr %a) sanitize_hwaddress { +; CHECK-LABEL: define i8 @test_load8 +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; CHECK-NEXT: ret i8 [[B]] +; +; GLOBAL-LABEL: define i8 @test_load8 +; GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; GLOBAL-NEXT: [[TMP1:%.*]] = load ptr, ptr @__hwasan_shadow_memory_dynamic_address, align 8 +; GLOBAL-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr [[A]], i32 0) +; GLOBAL-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; GLOBAL-NEXT: ret i8 [[B]] +; +; FIXED-LABEL: define i8 @test_load8 +; FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; FIXED-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr inttoptr (i64 567 to ptr)) +; FIXED-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; FIXED-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; FIXED-NEXT: ret i8 [[B]] +; +; FIXED-GLOBAL-LABEL: define i8 @test_load8 +; FIXED-GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; FIXED-GLOBAL-NEXT: [[TMP1:%.*]] = load ptr, ptr @__hwasan_shadow_memory_dynamic_address, align 8 +; FIXED-GLOBAL-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr [[A]], i32 0) +; FIXED-GLOBAL-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; FIXED-GLOBAL-NEXT: ret i8 [[B]] +; +; GLOBAL-FIXED-LABEL: define i8 @test_load8 +; GLOBAL-FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; GLOBAL-FIXED-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr inttoptr (i64 567 to ptr)) +; GLOBAL-FIXED-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; GLOBAL-FIXED-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; GLOBAL-FIXED-NEXT: ret i8 [[B]] +; + %b = load i8, ptr %a, align 4 + ret i8 %b +} From 3b8c78a610451f250a57122cdac394c6c8a66189 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 21:36:14 -0700 Subject: [PATCH 09/65] [RISCV] Enable f16 vget/vset/vcreate/vlmul_ext/vlmul_trunc/vundefined intrinsics with Zvfhmin. (#109889) These intrinsics don't produce any instructions so don't require Zvfh. This makes Zvfhmin consistent with Zvfbfmin. See also https://github.com/riscv-non-isa/rvv-intrinsic-doc/issues/351 --- clang/include/clang/Basic/riscv_vector.td | 44 ++++++++++++++----- .../non-policy/non-overloaded/vcreate.c | 2 +- .../non-policy/non-overloaded/vget.c | 2 +- .../non-policy/non-overloaded/vlmul_ext_v.c | 2 +- .../non-policy/non-overloaded/vlmul_trunc_v.c | 2 +- .../non-policy/non-overloaded/vset.c | 2 +- .../non-policy/non-overloaded/vundefined.c | 2 +- .../non-policy/overloaded/vget.c | 2 +- .../non-policy/overloaded/vlmul_ext_v.c | 2 +- .../non-policy/overloaded/vlmul_trunc_v.c | 2 +- .../non-policy/overloaded/vset.c | 2 +- 11 files changed, 44 insertions(+), 20 deletions(-) diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index 5ef9602433697c..6e57e51793a712 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -361,7 +361,11 @@ multiclass RVVNonTupleVCreateBuiltin src_lmul_list> { defvar src_s = FixedVString.S; def vcreate # src_v # dst_v : RVVBuiltin; + "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def vcreate_h # src_v # dst_v : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def vcreate_bf16 # src_v # dst_v : RVVBuiltin; + def vundefined : RVVBuiltin<"v", "v", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def vundefined_h : RVVBuiltin<"v", "v", "x">; let RequiredFeatures = ["Zvfbfmin"] in def vundefined_bf16 : RVVBuiltin<"v", "v", "y">; def vundefined_u : RVVBuiltin<"Uv", "Uv", "csil">; @@ -2482,7 +2488,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach nf = NFList in { let NF = nf in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; @@ -2502,7 +2510,10 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach dst_lmul = ["(SFixedLog2LMUL:-3)", "(SFixedLog2LMUL:-2)", "(SFixedLog2LMUL:-1)", "(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { def vlmul_trunc # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", - dst_lmul # "vv", "csilxfd", dst_lmul # "v">; + dst_lmul # "vv", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def vlmul_trunc_h # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", + dst_lmul # "vv", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def vlmul_trunc_bf16 # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "y", dst_lmul # "v">; @@ -2523,7 +2534,10 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach dst_lmul = ["(LFixedLog2LMUL:-2)", "(LFixedLog2LMUL:-1)", "(LFixedLog2LMUL:-0)", "(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { def vlmul_ext # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", - dst_lmul # "vv", "csilxfd", dst_lmul # "v">; + dst_lmul # "vv", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def vlmul_ext_h # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", + dst_lmul # "vv", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def vlmul_ext_bf16 # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "y", dst_lmul # "v">; @@ -2555,14 +2569,18 @@ let HasMasked = false, HasVL = false, IRName = "" in { } }] in { foreach dst_lmul = ["(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilxfd", dst_lmul # "v">; + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "y", dst_lmul # "v">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil", dst_lmul # "Uv">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; @@ -2592,14 +2610,18 @@ let HasMasked = false, HasVL = false, IRName = "" in { } }] in { foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "x">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "y">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "csilxfd">; + def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "x">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "y">; def : RVVBuiltin<"Uv" # T # "Uv", T # "Uv" # T # "UvKzUv", "csil">; @@ -2646,7 +2668,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { defvar T = "(Tuple:" # nf # ")"; defvar V = VString.S; defvar UV = VString.S; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c index 4cf8bbf6c61ee1..e2d493979732b4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c index a0e6555c03913a..a1ddfc3a92c804 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c index e6287775ed419a..69471904720f8e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c index dea288bdf4328b..a3e8ab87d06a8a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c index 06ccd3125c083f..88b00653c56ebc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c index 5950068181abfd..f18b999e892321 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c index e156ec91bfd2cd..afc9cff8dec04f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c index 92b894f1f5ef54..8a01f5ebdbcfee 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c index fa923d87bd1ba9..18f6901073a1e5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c index cc5a32878bd900..b63fa52fa3039b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s From d0878f13dffa406a267eb8d0b64f803951e997ea Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 21:36:55 -0700 Subject: [PATCH 10/65] [RISCV] Use RVVBitsPerBlock in assignRVVStackObjectOffsets and adjustReg. NFC (#109848) I think the 8 here represents RVVBitsPerBlock / 8. --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 7 ++++--- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7abd5a49a1b5fc..22824b77c37dd6 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1090,11 +1090,12 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); - auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI)); + auto ObjectAlign = + std::max(Align(RISCV::RVVBitsPerBlock / 8), MFI.getObjectAlign(FI)); // If the data type is the fractional vector type, reserve one vector // register for it. - if (ObjectSize < 8) - ObjectSize = 8; + if (ObjectSize < (RISCV::RVVBitsPerBlock / 8)) + ObjectSize = (RISCV::RVVBitsPerBlock / 8); Offset = alignTo(Offset + ObjectSize, ObjectAlign); MFI.setObjectOffset(FI, -Offset); // Update the maximum alignment of the RVV stack section diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 701594c0fb05dc..91d539a355ac25 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -200,11 +200,11 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); assert(ScalableValue > 0 && "There is no need to get VLEN scaled value."); - assert(ScalableValue % 8 == 0 && + assert(ScalableValue % (RISCV::RVVBitsPerBlock / 8) == 0 && "Reserve the stack by the multiple of one vector size."); - assert(isInt<32>(ScalableValue / 8) && + assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) && "Expect the number of vector registers within 32-bits."); - uint32_t NumOfVReg = ScalableValue / 8; + uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg) .setMIFlag(Flag); From 74d9f7ce80e8d729c18f12bde73bdd8ea750b369 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 21:49:06 -0700 Subject: [PATCH 11/65] [llvm] Use std::optional::value_or (NFC) (#109890) --- llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h | 3 +-- llvm/tools/llvm-cov/SourceCoverageViewText.cpp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index b93bc594a82bf3..4bdfa1cf4c1490 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -916,8 +916,7 @@ bool ConstructDecompositionT::applyClause( /*ReductionIdentifiers=*/std::get(clause.t), /*List=*/objects}}); - ReductionModifier effective = - modifier.has_value() ? *modifier : ReductionModifier::Default; + ReductionModifier effective = modifier.value_or(ReductionModifier::Default); bool effectiveApplied = false; // Walk over the leaf constructs starting from the innermost, and apply // the clause as required by the spec. diff --git a/llvm/tools/llvm-cov/SourceCoverageViewText.cpp b/llvm/tools/llvm-cov/SourceCoverageViewText.cpp index cab60c2d9034ed..8b93b592910b3d 100644 --- a/llvm/tools/llvm-cov/SourceCoverageViewText.cpp +++ b/llvm/tools/llvm-cov/SourceCoverageViewText.cpp @@ -179,7 +179,7 @@ void SourceCoverageViewText::renderLine(raw_ostream &OS, LineRef L, unsigned Col = 1; for (const auto *S : Segments) { unsigned End = std::min(S->Col, static_cast(Line.size()) + 1); - colored_ostream(OS, Highlight ? *Highlight : raw_ostream::SAVEDCOLOR, + colored_ostream(OS, Highlight.value_or(raw_ostream::SAVEDCOLOR), getOptions().Colors && Highlight, /*Bold=*/false, /*BG=*/true) << Line.substr(Col - 1, End - Col); @@ -196,7 +196,7 @@ void SourceCoverageViewText::renderLine(raw_ostream &OS, LineRef L, } // Show the rest of the line. - colored_ostream(OS, Highlight ? *Highlight : raw_ostream::SAVEDCOLOR, + colored_ostream(OS, Highlight.value_or(raw_ostream::SAVEDCOLOR), getOptions().Colors && Highlight, /*Bold=*/false, /*BG=*/true) << Line.substr(Col - 1, Line.size() - Col + 1); OS << '\n'; From c92137e474d504159bd9d51f125c8a9ba9141109 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 24 Sep 2024 22:05:51 -0700 Subject: [PATCH 12/65] [NFC][TableGen] Adopt scaled indent in PredicateExpander (#109801) Adopt scaled indent in PredicateExpander. Added pre/post inc/dec operators to `indent` and related unit tests. Verified by comparing *.inc files generated by LLVM build with/without the change. --- llvm/include/llvm/Support/raw_ostream.h | 24 ++++ llvm/unittests/Support/raw_ostream_test.cpp | 20 ++++ .../TableGen/Common/PredicateExpander.cpp | 107 +++++++----------- .../utils/TableGen/Common/PredicateExpander.h | 19 ++-- llvm/utils/TableGen/InstrInfoEmitter.cpp | 4 +- .../TableGen/MacroFusionPredicatorEmitter.cpp | 4 +- llvm/utils/TableGen/SubtargetEmitter.cpp | 20 ++-- 7 files changed, 106 insertions(+), 92 deletions(-) diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index c2f2299ed96455..d3b411590e7fd7 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -797,6 +797,30 @@ struct indent { assert(NumIndents >= N && "Indentation undeflow"); return indent(NumIndents - N, Scale); } + indent &operator++() { // Prefix ++. + ++NumIndents; + return *this; + } + indent operator++(int) { // Postfix ++. + indent Old = *this; + ++NumIndents; + return Old; + } + indent &operator--() { // Prefix --. + assert(NumIndents >= 1); + --NumIndents; + return *this; + } + indent operator--(int) { // Postfix --. + indent Old = *this; + assert(NumIndents >= 1); + --NumIndents; + return Old; + } + indent &operator=(unsigned N) { + NumIndents = N; + return *this; + } }; inline raw_ostream &operator<<(raw_ostream &OS, const indent &Indent) { diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index a35edd61685296..fbeff37d26a354 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -198,6 +198,26 @@ TEST(raw_ostreamTest, Indent) { EXPECT_EQ(Spaces(10), printToString(Scaled)); Scaled -= 1; EXPECT_EQ(Spaces(8), printToString(Scaled)); + + // Operators. + Indent = 10; + EXPECT_EQ(Spaces(10), printToString(Indent)); + + indent Temp = Indent++; + EXPECT_EQ(Spaces(11), printToString(Indent)); + EXPECT_EQ(Spaces(10), printToString(Temp)); + + Temp = Indent--; + EXPECT_EQ(Spaces(10), printToString(Indent)); + EXPECT_EQ(Spaces(11), printToString(Temp)); + + Temp = ++Indent; + EXPECT_EQ(Spaces(11), printToString(Indent)); + EXPECT_EQ(Spaces(11), printToString(Temp)); + + Temp = --Indent; + EXPECT_EQ(Spaces(10), printToString(Indent)); + EXPECT_EQ(Spaces(10), printToString(Temp)); } TEST(raw_ostreamTest, FormatHex) { diff --git a/llvm/utils/TableGen/Common/PredicateExpander.cpp b/llvm/utils/TableGen/Common/PredicateExpander.cpp index 2afaa8cc21aa66..314e563ba90bb4 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.cpp +++ b/llvm/utils/TableGen/Common/PredicateExpander.cpp @@ -153,10 +153,9 @@ void PredicateExpander::expandCheckOpcode(raw_ostream &OS, } OS << '('; - increaseIndentLevel(); + ++Indent; for (const Record *Rec : Opcodes) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; if (!First) OS << (shouldNegate() ? "&& " : "|| "); @@ -164,10 +163,8 @@ void PredicateExpander::expandCheckOpcode(raw_ostream &OS, First = false; } - OS << '\n'; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << ')'; + --Indent; + OS << '\n' << Indent << ')'; } void PredicateExpander::expandCheckPseudo(raw_ostream &OS, @@ -187,22 +184,19 @@ void PredicateExpander::expandPredicateSequence( // Okay, there is more than one predicate in the set. bool First = true; OS << (shouldNegate() ? "!(" : "("); - increaseIndentLevel(); + ++Indent; bool OldValue = shouldNegate(); setNegatePredicate(false); for (const Record *Rec : Sequence) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; if (!First) OS << (IsCheckAll ? "&& " : "|| "); expandPredicate(OS, Rec); First = false; } - OS << '\n'; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << ')'; + --Indent; + OS << '\n' << Indent << ')'; setNegatePredicate(OldValue); } @@ -269,15 +263,14 @@ void PredicateExpander::expandReturnStatement(raw_ostream &OS, void PredicateExpander::expandOpcodeSwitchCase(raw_ostream &OS, const Record *Rec) { for (const Record *Opcode : Rec->getValueAsListOfDefs("Opcodes")) { - OS.indent(getIndentLevel() * 2); - OS << "case " << Opcode->getValueAsString("Namespace") + OS << Indent << "case " << Opcode->getValueAsString("Namespace") << "::" << Opcode->getName() << ":\n"; } - increaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + ++Indent; + OS << Indent; expandStatement(OS, Rec->getValueAsDef("CaseStmt")); - decreaseIndentLevel(); + --Indent; } void PredicateExpander::expandOpcodeSwitchStatement( @@ -292,17 +285,12 @@ void PredicateExpander::expandOpcodeSwitchStatement( } // Expand the default case. - SS.indent(getIndentLevel() * 2); - SS << "default:\n"; + SS << Indent << "default:\n"; - increaseIndentLevel(); - SS.indent(getIndentLevel() * 2); + ++Indent; + SS << Indent; expandStatement(SS, Default); - decreaseIndentLevel(); - SS << '\n'; - - SS.indent(getIndentLevel() * 2); - SS << "} // end of switch-stmt"; + SS << '\n' << Indent << "} // end of switch-stmt"; OS << Buffer; } @@ -436,8 +424,7 @@ void STIPredicateExpander::expandHeader(raw_ostream &OS, const Record *Rec = Fn.getDeclaration(); StringRef FunctionName = Rec->getValueAsString("Name"); - OS.indent(getIndentLevel() * 2); - OS << "bool "; + OS << Indent << "bool "; if (shouldExpandDefinition()) OS << getClassPrefix() << "::"; OS << FunctionName << "("; @@ -463,26 +450,22 @@ void STIPredicateExpander::expandPrologue(raw_ostream &OS, bool UpdatesOpcodeMask = Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); - increaseIndentLevel(); - unsigned IndentLevel = getIndentLevel(); + ++Indent; for (const Record *Delegate : Fn.getDeclaration()->getValueAsListOfDefs("Delegates")) { - OS.indent(IndentLevel * 2); - OS << "if (" << Delegate->getValueAsString("Name") << "(MI"; + OS << Indent << "if (" << Delegate->getValueAsString("Name") << "(MI"; if (UpdatesOpcodeMask) OS << ", Mask"; if (shouldExpandForMC()) OS << ", ProcessorID"; OS << "))\n"; - OS.indent((1 + IndentLevel) * 2); - OS << "return true;\n\n"; + OS << Indent + 1 << "return true;\n\n"; } if (shouldExpandForMC()) return; - OS.indent(IndentLevel * 2); - OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; + OS << Indent << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; } void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, @@ -497,8 +480,7 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, continue; if (FirstProcID) { - OS.indent(getIndentLevel() * 2); - OS << "if (ProcessorID == " << I; + OS << Indent << "if (ProcessorID == " << I; } else { OS << " || ProcessorID == " << I; } @@ -507,21 +489,20 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, OS << ") {\n"; - increaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + ++Indent; + OS << Indent; if (ShouldUpdateOpcodeMask) { if (PI.OperandMask.isZero()) OS << "Mask.clearAllBits();\n"; else OS << "Mask = " << PI.OperandMask << ";\n"; - OS.indent(getIndentLevel() * 2); + OS << Indent; } OS << "return "; expandPredicate(OS, PI.Predicate); OS << ";\n"; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << "}\n"; + --Indent; + OS << Indent << "}\n"; } } @@ -530,46 +511,38 @@ void STIPredicateExpander::expandBody(raw_ostream &OS, bool UpdatesOpcodeMask = Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); - unsigned IndentLevel = getIndentLevel(); - OS.indent(IndentLevel * 2); - OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; - OS.indent(IndentLevel * 2); - OS << "default:\n"; - OS.indent(IndentLevel * 2); - OS << " break;"; + OS << Indent << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; + OS << Indent << "default:\n"; + OS << Indent << " break;"; for (const OpcodeGroup &Group : Fn.getGroups()) { for (const Record *Opcode : Group.getOpcodes()) { - OS << '\n'; - OS.indent(IndentLevel * 2); - OS << "case " << getTargetName() << "::" << Opcode->getName() << ":"; + OS << '\n' + << Indent << "case " << getTargetName() << "::" << Opcode->getName() + << ":"; } OS << '\n'; - increaseIndentLevel(); + ++Indent; expandOpcodeGroup(OS, Group, UpdatesOpcodeMask); - OS.indent(getIndentLevel() * 2); - OS << "break;\n"; - decreaseIndentLevel(); + OS << Indent << "break;\n"; + --Indent; } - OS.indent(IndentLevel * 2); - OS << "}\n"; + OS << Indent << "}\n"; } void STIPredicateExpander::expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; OS << "return "; expandPredicate(OS, Fn.getDefaultReturnPredicate()); OS << ";\n"; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + --Indent; StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name"); - OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; + OS << Indent << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; } void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS, diff --git a/llvm/utils/TableGen/Common/PredicateExpander.h b/llvm/utils/TableGen/Common/PredicateExpander.h index c0cd69e3cb1f85..0c3a8718a473f1 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.h +++ b/llvm/utils/TableGen/Common/PredicateExpander.h @@ -18,39 +18,38 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { -class raw_ostream; class Record; class PredicateExpander { bool EmitCallsByRef; bool NegatePredicate; bool ExpandForMC; - unsigned IndentLevel; StringRef TargetName; PredicateExpander(const PredicateExpander &) = delete; PredicateExpander &operator=(const PredicateExpander &) = delete; +protected: + indent Indent; + public: - PredicateExpander(StringRef Target) + explicit PredicateExpander(StringRef Target, unsigned Indent = 1) : EmitCallsByRef(true), NegatePredicate(false), ExpandForMC(false), - IndentLevel(1U), TargetName(Target) {} + TargetName(Target), Indent(Indent, 2) {} bool isByRef() const { return EmitCallsByRef; } bool shouldNegate() const { return NegatePredicate; } bool shouldExpandForMC() const { return ExpandForMC; } - unsigned getIndentLevel() const { return IndentLevel; } + indent &getIndent() { return Indent; } StringRef getTargetName() const { return TargetName; } void setByRef(bool Value) { EmitCallsByRef = Value; } void flipNegatePredicate() { NegatePredicate = !NegatePredicate; } void setNegatePredicate(bool Value) { NegatePredicate = Value; } void setExpandForMC(bool Value) { ExpandForMC = Value; } - void setIndentLevel(unsigned Level) { IndentLevel = Level; } - void increaseIndentLevel() { ++IndentLevel; } - void decreaseIndentLevel() { --IndentLevel; } void expandTrue(raw_ostream &OS); void expandFalse(raw_ostream &OS); @@ -116,8 +115,8 @@ class STIPredicateExpander : public PredicateExpander { void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn); public: - STIPredicateExpander(StringRef Target) - : PredicateExpander(Target), ExpandDefinition(false) {} + explicit STIPredicateExpander(StringRef Target, unsigned Indent = 1) + : PredicateExpander(Target, Indent), ExpandDefinition(false) {} bool shouldExpandDefinition() const { return ExpandDefinition; } StringRef getClassPrefix() const { return ClassPrefix; } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index cc5ef49385bb86..46605095ba85f8 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -711,7 +711,7 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, OS << "bool " << Rec->getValueAsString("FunctionName"); OS << "(const MCInst &MI) {\n"; - OS.indent(PE.getIndentLevel() * 2); + OS << PE.getIndent(); PE.expandStatement(OS, Rec->getValueAsDef("Body")); OS << "\n}\n\n"; } @@ -914,7 +914,7 @@ void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS, } OS << " {\n"; - OS.indent(PE.getIndentLevel() * 2); + OS << PE.getIndent(); PE.expandStatement(OS, Rec->getValueAsDef("Body")); OS << "\n}\n\n"; } diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp index c4f238b67476a7..6ca2fea41230b8 100644 --- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -160,7 +160,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(const Record *Predicate, OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; OS.indent(4) << "if ("; PE.setNegatePredicate(true); - PE.setIndentLevel(3); + PE.getIndent() = 3; PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); OS << ")\n"; OS.indent(4) << " return false;\n"; @@ -181,7 +181,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(const Record *Predicate, OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; OS.indent(4) << "if ("; PE.setNegatePredicate(true); - PE.setIndentLevel(3); + PE.getIndent() = 3; PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); OS << ")\n"; OS.indent(4) << " return false;\n"; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 78d80ff82d6a4f..d21e19c060afc5 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1576,13 +1576,13 @@ static void emitPredicates(const CodeGenSchedTransition &T, unsigned NumNonTruePreds = T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate); - SS.indent(PE.getIndentLevel() * 2); + SS << PE.getIndent(); if (NumNonTruePreds) { bool FirstNonTruePredicate = true; SS << "if ("; - PE.setIndentLevel(PE.getIndentLevel() + 2); + PE.getIndent() += 2; for (const Record *Rec : T.PredTerm) { // Skip predicates that evaluate to "true". @@ -1593,7 +1593,7 @@ static void emitPredicates(const CodeGenSchedTransition &T, FirstNonTruePredicate = false; } else { SS << "\n"; - SS.indent(PE.getIndentLevel() * 2); + SS << PE.getIndent(); SS << "&& "; } @@ -1610,9 +1610,9 @@ static void emitPredicates(const CodeGenSchedTransition &T, } SS << ")\n"; // end of if-stmt - PE.decreaseIndentLevel(); - SS.indent(PE.getIndentLevel() * 2); - PE.decreaseIndentLevel(); + --PE.getIndent(); + SS << PE.getIndent(); + --PE.getIndent(); } SS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n'; @@ -1736,7 +1736,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl( FinalT = &T; continue; } - PE.setIndentLevel(3); + PE.getIndent() = 3; emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS); } if (FinalT) @@ -1780,11 +1780,10 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName, << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n" << "} // " << ClassName << "::resolveVariantSchedClass\n\n"; - STIPredicateExpander PE(Target); + STIPredicateExpander PE(Target, /*Indent=*/0); PE.setClassPrefix(ClassName); PE.setExpandDefinition(true); PE.setByRef(false); - PE.setIndentLevel(0); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); @@ -1962,7 +1961,7 @@ void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n"; OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; - STIPredicateExpander PE(Target); + STIPredicateExpander PE(Target, /*Indent=*/0); PE.setExpandForMC(true); PE.setByRef(true); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) @@ -1976,7 +1975,6 @@ void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { std::string ClassPrefix = Target + "MCInstrAnalysis"; PE.setExpandDefinition(true); PE.setClassPrefix(ClassPrefix); - PE.setIndentLevel(0); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); From 614aeda93b2225c6eb42b00ba189ba7ca2585c60 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 25 Sep 2024 06:14:43 +0100 Subject: [PATCH 13/65] [RISCV] Mark Zacas as non-experimental (#109651) The extension has been ratified for some time, but we kept it experimental (see #99898) due to . The ABI issue has been resolved by #101023 so I believe there's no known barrier to moving Zacas to non-experimental. --- .../Driver/print-supported-extensions-riscv.c | 2 +- .../test/Preprocessor/riscv-target-features.c | 18 +++++++++--------- llvm/docs/RISCVUsage.rst | 9 ++++++--- llvm/docs/ReleaseNotes.rst | 1 + llvm/lib/Target/RISCV/RISCVFeatures.td | 4 ++-- llvm/lib/TargetParser/Host.cpp | 3 +-- .../RISCV/atomic-cmpxchg-branch-on-result.ll | 6 +++--- llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 12 ++++++------ llvm/test/CodeGen/RISCV/atomic-rmw.ll | 12 ++++++------ llvm/test/CodeGen/RISCV/atomic-signext.ll | 4 ++-- llvm/test/CodeGen/RISCV/attributes.ll | 4 ++-- llvm/test/MC/RISCV/rv32zacas-invalid.s | 2 +- llvm/test/MC/RISCV/rv32zacas-valid.s | 12 ++++++------ llvm/test/MC/RISCV/rv64zacas-invalid.s | 2 +- llvm/test/MC/RISCV/rv64zacas-valid.s | 6 +++--- llvm/test/MC/RISCV/rvzabha-zacas-valid.s | 12 ++++++------ .../TargetParser/RISCVISAInfoTest.cpp | 2 +- 17 files changed, 57 insertions(+), 54 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 312c462f715d5e..a39c1ab36b1db0 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -35,6 +35,7 @@ // CHECK-NEXT: za64rs 1.0 'Za64rs' (Reservation Set Size of at Most 64 Bytes) // CHECK-NEXT: zaamo 1.0 'Zaamo' (Atomic Memory Operations) // CHECK-NEXT: zabha 1.0 'Zabha' (Byte and Halfword Atomic Memory Operations) +// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) // CHECK-NEXT: zalrsc 1.0 'Zalrsc' (Load-Reserved/Store-Conditional) // CHECK-NEXT: zama16b 1.0 'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs) // CHECK-NEXT: zawrs 1.0 'Zawrs' (Wait on Reservation Set) @@ -171,7 +172,6 @@ // CHECK-NEXT: Experimental extensions // CHECK-NEXT: zicfilp 1.0 'Zicfilp' (Landing pad) // CHECK-NEXT: zicfiss 1.0 'Zicfiss' (Shadow stack) -// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) // CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions) // CHECK-NEXT: zvbc32e 0.7 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 60675065495bba..05a8534ba13da1 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -87,6 +87,7 @@ // CHECK-NOT: __riscv_za64rs {{.*$}} // CHECK-NOT: __riscv_zaamo {{.*$}} // CHECK-NOT: __riscv_zabha {{.*$}} +// CHECK-NOT: __riscv_zacas {{.*$}} // CHECK-NOT: __riscv_zalrsc {{.*$}} // CHECK-NOT: __riscv_zama16b {{.*$}} // CHECK-NOT: __riscv_zawrs {{.*$}} @@ -183,7 +184,6 @@ // CHECK-NOT: __riscv_ssnpm{{.*$}} // CHECK-NOT: __riscv_sspm{{.*$}} // CHECK-NOT: __riscv_supm{{.*$}} -// CHECK-NOT: __riscv_zacas {{.*$}} // CHECK-NOT: __riscv_zalasr {{.*$}} // CHECK-NOT: __riscv_zfbfmin {{.*$}} // CHECK-NOT: __riscv_zicfilp {{.*$}} @@ -751,6 +751,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZABHA-EXT %s // CHECK-ZABHA-EXT: __riscv_zabha 1000000{{$}} +// RUN: %clang --target=riscv32 \ +// RUN: -march=rv32ia_zacas1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s +// RUN: %clang --target=riscv64 \ +// RUN: -march=rv64ia_zacas1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s +// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}} + // RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_zalrsc1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZALRSC-EXT %s @@ -1630,14 +1638,6 @@ // CHECK-ZVKT-EXT: __riscv_zvkt 1000000{{$}} // Experimental extensions -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ -// RUN: -march=rv32ia_zacas1p0 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ -// RUN: -march=rv64ia_zacas1p0 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s -// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}} - // RUN: %clang --target=riscv32 -menable-experimental-extensions \ // RUN: -march=rv32i_zalasr0p1 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZALASR-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index f4240048574541..7bea64792d472c 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -154,6 +154,7 @@ on support follow. ``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zaamo`` Assembly Support ``Zabha`` Supported + ``Zacas`` Supported (`See note<#riscv-zacas-note>`__) ``Zalrsc`` Assembly Support ``Zama16b`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zawrs`` Assembly Support @@ -287,6 +288,11 @@ Supported ``Za128rs``, ``Za64rs``, ``Zama16b``, ``Zic64b``, ``Ziccamoa``, ``Ziccif``, ``Zicclsm``, ``Ziccrse``, ``Shcounterenvw``, ``Shgatpa``, ``Shtvala``, ``Shvsatpa``, ``Shvstvala``, ``Shvstvecd``, ``Ssccptr``, ``Sscounterenw``, ``Ssstateen``, ``Ssstrict``, ``Sstvala``, ``Sstvecd``, ``Ssu64xl``, ``Svade``, ``Svbare`` These extensions are defined as part of the `RISC-V Profiles specification `__. They do not introduce any new features themselves, but instead describe existing hardware features. +.. _riscv-zacas-note: + +``Zacas`` + The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. + Atomics ABIs ============ @@ -304,9 +310,6 @@ The primary goal of experimental support is to assist in the process of ratifica ``experimental-ssnpm``, ``experimental-smnpm``, ``experimental-smmpm``, ``experimental-sspm``, ``experimental-supm`` LLVM implements the `v1.0.0-rc2 specification `__. -``experimental-zacas`` - LLVM implements the `1.0 release specification `__. amocas.w will be used for i32 cmpxchg. amocas.d will be used i64 cmpxchg on RV64. The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. The extension will be left as experimental until `an ABI issue `__ is resolved. - ``experimental-zalasr`` LLVM implements the `0.0.5 draft specification `__. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index c85ea28ad9f8c7..9bf838c39643d6 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -149,6 +149,7 @@ Changes to the RISC-V Backend * The ``Zvbc32e`` and ``Zvkgs`` extensions are now supported experimentally. * Added ``Smctr`` and ``Ssctr`` extensions. * ``-mcpu=syntacore-scr7`` was added. +* The ``Zacas`` extension is no longer marked as experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 52f5a637eb740d..3d0e1dae801d39 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -243,8 +243,8 @@ def HasStdExtZabha : Predicate<"Subtarget->hasStdExtZabha()">, "'Zabha' (Byte and Halfword Atomic Memory Operations)">; def FeatureStdExtZacas - : RISCVExperimentalExtension<"zacas", 1, 0, - "'Zacas' (Atomic Compare-And-Swap Instructions)">, + : RISCVExtension<"zacas", 1, 0, + "'Zacas' (Atomic Compare-And-Swap Instructions)">, RISCVExtensionBitmask<0, 26>; def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, AssemblerPredicate<(all_of FeatureStdExtZacas), diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index b2c4f9ee002930..616e4eda1dd29d 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2040,8 +2040,7 @@ const StringMap sys::getHostCPUFeatures() { Features["zvfhmin"] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN Features["zfa"] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA Features["ztso"] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO - // TODO: Re-enable zacas when it is marked non-experimental again. - // Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS + Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS Features["zicond"] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND Features["zihintpause"] = ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll index e70ba93de75e01..234a956be809ed 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=NOZACAS,RV32IA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=NOZACAS,RV64IA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV64IA-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV64IA-ZABHA %s ; Test cmpxchg followed by a branch on the cmpxchg success value to see if the diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index acd6e8f9afe2a0..9908503adb9c30 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -3,25 +3,25 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO-ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO-ZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-WMO-ZABHA %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-TSO-ZABHA %s define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 03157e13bff787..f50744fc3c1f32 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -12,22 +12,22 @@ ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-TSO,RV64IA-TSO-NOZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO,RV32IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO,RV32IA-TSO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO,RV64IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO,RV64IA-TSO-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-NOZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-ZACAS %s define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index c143be478948ed..ed0a160d3f58ad 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -3,13 +3,13 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 1d4a634c89a22f..86ce368bc1db66 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -121,7 +121,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFWMA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV32ZALASR %s ; RUN: llc -mtriple=riscv32 -mattr=+zama16b %s -o - | FileCheck --check-prefixes=CHECK,RV32ZAMA16B %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV32ZICFILP %s @@ -264,7 +264,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFWMA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas %s -o - | FileCheck --check-prefix=RV64ZACAS %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas %s -o - | FileCheck --check-prefix=RV64ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV64ZALASR %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV64ZICFILP %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV64ZABHA %s diff --git a/llvm/test/MC/RISCV/rv32zacas-invalid.s b/llvm/test/MC/RISCV/rv32zacas-invalid.s index bad2edcaaa9156..6927a2733b8e64 100644 --- a/llvm/test/MC/RISCV/rv32zacas-invalid.s +++ b/llvm/test/MC/RISCV/rv32zacas-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+a,+experimental-zacas < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv32 -mattr=+a,+zacas < %s 2>&1 | FileCheck %s # Non-zero offsets not supported for the third operand (rs1). amocas.w a1, a3, 1(a5) # CHECK: :[[@LINE]]:18: error: optional integer offset must be 0 diff --git a/llvm/test/MC/RISCV/rv32zacas-valid.s b/llvm/test/MC/RISCV/rv32zacas-valid.s index 8ba2b02542bc0b..0e76f023994833 100644 --- a/llvm/test/MC/RISCV/rv32zacas-valid.s +++ b/llvm/test/MC/RISCV/rv32zacas-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv32 -mattr=+a -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/RISCV/rv64zacas-invalid.s b/llvm/test/MC/RISCV/rv64zacas-invalid.s index 854e6fe308b0a7..e75ff9e9f94ca0 100644 --- a/llvm/test/MC/RISCV/rv64zacas-invalid.s +++ b/llvm/test/MC/RISCV/rv64zacas-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+a,+experimental-zacas < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv64 -mattr=+a,+zacas < %s 2>&1 | FileCheck %s # Non-zero offsets not supported for the third operand (rs1). amocas.w a1, a3, 1(a5) # CHECK: :[[@LINE]]:18: error: optional integer offset must be 0 diff --git a/llvm/test/MC/RISCV/rv64zacas-valid.s b/llvm/test/MC/RISCV/rv64zacas-valid.s index d5044a0e0671de..595c70b6e3f5b1 100644 --- a/llvm/test/MC/RISCV/rv64zacas-valid.s +++ b/llvm/test/MC/RISCV/rv64zacas-valid.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv64 -mattr=+a -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/RISCV/rvzabha-zacas-valid.s b/llvm/test/MC/RISCV/rvzabha-zacas-valid.s index f8aa6867aedc6c..97afb9d6563e56 100644 --- a/llvm/test/MC/RISCV/rvzabha-zacas-valid.s +++ b/llvm/test/MC/RISCV/rvzabha-zacas-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zabha,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zabha,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zabha,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zabha,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zabha,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+zabha,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zabha,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zabha,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zabha,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+zabha,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zabha,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zabha,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv32 -mattr=+a,+zabha -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 6662421eb26d9d..33944b64dc1577 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -980,6 +980,7 @@ R"(All available -march extensions for RISC-V za64rs 1.0 zaamo 1.0 zabha 1.0 + zacas 1.0 zalrsc 1.0 zama16b 1.0 zawrs 1.0 @@ -1116,7 +1117,6 @@ R"(All available -march extensions for RISC-V Experimental extensions zicfilp 1.0 This is a long dummy description zicfiss 1.0 - zacas 1.0 zalasr 0.1 zvbc32e 0.7 zvkgs 0.7 From f586b1e3f42788025aa6f55be70c5e361cc8b529 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 25 Sep 2024 01:27:21 -0400 Subject: [PATCH 14/65] [MLIR] Make `OneShotModuleBufferize` use `OpInterface` (#107295) **Description:** `OneShotModuleBufferize` deals with the bufferization of `FuncOp`, `CallOp` and `ReturnOp` but they are hard-coded. Any custom function-like operations will not be handled. The PR replaces a part of `FuncOp` and `CallOp` with `FunctionOpInterface` and `CallOpInterface` in `OneShotModuleBufferize` so that custom function ops and call ops can be bufferized. **Related Discord Discussion:** [Link](https://discord.com/channels/636084430946959380/642426447167881246/1280556809911799900) --------- Co-authored-by: erick-xanadu <110487834+erick-xanadu@users.noreply.github.com> --- .../IR/BufferizableOpInterface.h | 7 +- .../FuncBufferizableOpInterfaceImpl.h | 12 +- .../IR/BufferizableOpInterface.cpp | 5 +- .../FuncBufferizableOpInterfaceImpl.cpp | 2 +- .../Transforms/OneShotModuleBufferize.cpp | 111 +++++++------ .../Transforms/transform-ops.mlir | 142 +++++++++-------- mlir/test/Dialect/LLVM/transform-e2e.mlir | 22 +-- .../Linalg/matmul-shared-memory-padding.mlir | 52 +++--- .../Linalg/pad-to-specific-memory-space.mlir | 148 +++++++++--------- .../test/Dialect/Vector/transform-vector.mlir | 84 +++++----- mlir/test/Examples/transform/ChH/full.mlir | 11 +- 11 files changed, 315 insertions(+), 281 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index aceb9d059b95f3..d19687ec9afee1 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -11,6 +11,7 @@ #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/SetVector.h" @@ -260,9 +261,9 @@ struct BufferizationOptions { using AnalysisStateInitFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, func op, bufferization options - using FunctionArgTypeConverterFn = - std::function; + using FunctionArgTypeConverterFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, bufferization options using UnknownTypeConverterFn = std::function equivalentFuncArgs; + DenseMap equivalentFuncArgs; /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices. - DenseMap aliasingReturnVals; + DenseMap aliasingReturnVals; /// A set of all read BlockArguments of FuncOps. - DenseMap readBbArgs; + DenseMap readBbArgs; /// A set of all written-to BlockArguments of FuncOps. - DenseMap writtenBbArgs; + DenseMap writtenBbArgs; /// Keep track of which FuncOps are fully analyzed or currently being /// analyzed. - DenseMap analyzedFuncOps; + DenseMap analyzedFuncOps; /// This function is called right before analyzing the given FuncOp. It /// initializes the data structures for the FuncOp in this state object. - void startFunctionAnalysis(FuncOp funcOp); + void startFunctionAnalysis(FunctionOpInterface funcOp); }; void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 85604eef2f2830..92f757111cbaf7 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Debug.h" @@ -314,7 +315,7 @@ namespace { /// Default function arg type converter: Use a fully dynamic layout map. BaseMemRefType defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace, - func::FuncOp funcOp, + FunctionOpInterface funcOp, const BufferizationOptions &options) { return getMemRefTypeWithFullyDynamicLayout(type, memorySpace); } @@ -361,7 +362,7 @@ BufferizationOptions::dynCastBufferizableOp(Value value) const { void BufferizationOptions::setFunctionBoundaryTypeConversion( LayoutMapOption layoutMapOption) { functionArgTypeConverterFn = [=](TensorType tensorType, Attribute memorySpace, - func::FuncOp funcOp, + FunctionOpInterface funcOp, const BufferizationOptions &options) { if (layoutMapOption == LayoutMapOption::IdentityLayoutMap) return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 9fbe574ec392dc..9749a71f3514bc 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -22,7 +22,7 @@ namespace mlir { namespace bufferization { namespace func_ext { -void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) { +void FuncAnalysisState::startFunctionAnalysis(FunctionOpInterface funcOp) { analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress; auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping()); auto createdAliasingResults = diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index 0a4072605c265f..f934ae88277b63 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; using namespace mlir::bufferization::func_ext; /// A mapping of FuncOps to their callers. -using FuncCallerMap = DenseMap>; +using FuncCallerMap = DenseMap>; /// Get or create FuncAnalysisState. static FuncAnalysisState & @@ -88,10 +88,11 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { /// Return the unique ReturnOp that terminates `funcOp`. /// Return nullptr if there is no such unique ReturnOp. -static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) { - func::ReturnOp returnOp; - for (Block &b : funcOp.getBody()) { - if (auto candidateOp = dyn_cast(b.getTerminator())) { +static Operation *getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { + Operation *returnOp = nullptr; + for (Block &b : funcOp.getFunctionBody()) { + auto candidateOp = b.getTerminator(); + if (candidateOp && candidateOp->hasTrait()) { if (returnOp) return nullptr; returnOp = candidateOp; @@ -126,16 +127,16 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. static LogicalResult -aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, + OneShotAnalysisState &state, FuncAnalysisState &funcState) { - if (funcOp.getBody().empty()) { + if (funcOp.getFunctionBody().empty()) { // No function body available. Conservatively assume that every tensor // return value may alias with any tensor bbArg. - FunctionType type = funcOp.getFunctionType(); - for (const auto &inputIt : llvm::enumerate(type.getInputs())) { + for (const auto &inputIt : llvm::enumerate(funcOp.getArgumentTypes())) { if (!isa(inputIt.value())) continue; - for (const auto &resultIt : llvm::enumerate(type.getResults())) { + for (const auto &resultIt : llvm::enumerate(funcOp.getResultTypes())) { if (!isa(resultIt.value())) continue; int64_t returnIdx = resultIt.index(); @@ -147,7 +148,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, } // Support only single return-terminated block in the function. - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); assert(returnOp && "expected func with single return op"); for (OpOperand &returnVal : returnOp->getOpOperands()) @@ -168,8 +169,8 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, return success(); } -static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, - bool isWritten) { +static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, + bool isRead, bool isWritten) { OpBuilder b(funcOp.getContext()); Attribute accessType; if (isRead && isWritten) { @@ -189,12 +190,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, /// function with unknown ops, we conservatively assume that such ops bufferize /// to a read + write. static LogicalResult -funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, + OneShotAnalysisState &state, FuncAnalysisState &funcState) { - for (int64_t idx = 0, e = funcOp.getFunctionType().getNumInputs(); idx < e; - ++idx) { + for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; ++idx) { // Skip non-tensor arguments. - if (!isa(funcOp.getFunctionType().getInput(idx))) + if (!isa(funcOp.getArgumentTypes()[idx])) continue; bool isRead; bool isWritten; @@ -204,7 +205,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, StringRef str = accessAttr.getValue(); isRead = str == "read" || str == "read-write"; isWritten = str == "write" || str == "read-write"; - } else if (funcOp.getBody().empty()) { + } else if (funcOp.getFunctionBody().empty()) { // If the function has no body, conservatively assume that all args are // read + written. isRead = true; @@ -230,20 +231,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, /// Remove bufferization attributes on FuncOp arguments. static void removeBufferizationAttributes(BlockArgument bbArg) { - auto funcOp = cast(bbArg.getOwner()->getParentOp()); + auto funcOp = cast(bbArg.getOwner()->getParentOp()); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kBufferLayoutAttrName); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kWritableAttrName); } -/// Return the func::FuncOp called by `callOp`. -static func::FuncOp getCalledFunction(func::CallOp callOp) { +static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { SymbolRefAttr sym = llvm::dyn_cast_if_present(callOp.getCallableForCallee()); if (!sym) return nullptr; - return dyn_cast_or_null( + return dyn_cast_or_null( SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } @@ -251,12 +251,12 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. // TODO: This does not handle cyclic function call graphs etc. -static void equivalenceAnalysis(func::FuncOp funcOp, +static void equivalenceAnalysis(FunctionOpInterface funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - funcOp->walk([&](func::CallOp callOp) { - func::FuncOp calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called func::FuncOp"); + funcOp->walk([&](CallOpInterface callOp) { + FunctionOpInterface calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called FunctionOpInterface"); // No equivalence info available for the called function. if (!funcState.equivalentFuncArgs.count(calledFunction)) @@ -267,7 +267,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, int64_t bbargIdx = it.second; if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) continue; - Value returnVal = callOp.getResult(returnIdx); + Value returnVal = callOp->getResult(returnIdx); Value argVal = callOp->getOperand(bbargIdx); state.unionEquivalenceClasses(returnVal, argVal); } @@ -277,11 +277,9 @@ static void equivalenceAnalysis(func::FuncOp funcOp, } /// Return "true" if the given function signature has tensor semantics. -static bool hasTensorSignature(func::FuncOp funcOp) { - return llvm::any_of(funcOp.getFunctionType().getInputs(), - llvm::IsaPred) || - llvm::any_of(funcOp.getFunctionType().getResults(), - llvm::IsaPred); +static bool hasTensorSignature(FunctionOpInterface funcOp) { + return llvm::any_of(funcOp.getArgumentTypes(), llvm::IsaPred) || + llvm::any_of(funcOp.getResultTypes(), llvm::IsaPred); } /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by @@ -291,16 +289,16 @@ static bool hasTensorSignature(func::FuncOp funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, - SmallVectorImpl &orderedFuncOps, + SmallVectorImpl &orderedFuncOps, FuncCallerMap &callerMap) { // For each FuncOp, the set of functions called by it (i.e. the union of // symbols of all nested func::CallOp). - DenseMap> calledBy; + DenseMap> calledBy; // For each FuncOp, the number of func::CallOp it contains. - DenseMap numberCallOpsContainedInFuncOp; - WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { - if (!funcOp.getBody().empty()) { - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { + if (!funcOp.getFunctionBody().empty()) { + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); if (!returnOp) return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " @@ -309,9 +307,10 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](func::CallOp callOp) -> WalkResult { - func::FuncOp calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called func::FuncOp"); + return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { + FunctionOpInterface calledFunction = getCalledFunction(callOp); + assert(calledFunction && + "could not retrieved called FunctionOpInterface"); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. if (!hasTensorSignature(calledFunction)) @@ -349,11 +348,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, /// most generic layout map as function return types. After bufferizing the /// entire function body, a more concise memref type can potentially be used for /// the return type of the function. -static void foldMemRefCasts(func::FuncOp funcOp) { - if (funcOp.getBody().empty()) +static void foldMemRefCasts(FunctionOpInterface funcOp) { + if (funcOp.getFunctionBody().empty()) return; - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); SmallVector resultTypes; for (OpOperand &operand : returnOp->getOpOperands()) { @@ -365,8 +364,8 @@ static void foldMemRefCasts(func::FuncOp funcOp) { } } - auto newFuncType = FunctionType::get( - funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes); + auto newFuncType = FunctionType::get(funcOp.getContext(), + funcOp.getArgumentTypes(), resultTypes); funcOp.setType(newFuncType); } @@ -379,7 +378,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -388,7 +387,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (func::FuncOp funcOp : orderedFuncOps) { + for (FunctionOpInterface funcOp : orderedFuncOps) { if (!state.getOptions().isOpAllowed(funcOp)) continue; @@ -416,7 +415,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, void mlir::bufferization::removeBufferizationAttributesInModule( ModuleOp moduleOp) { - moduleOp.walk([&](func::FuncOp op) { + moduleOp.walk([&](FunctionOpInterface op) { for (BlockArgument bbArg : op.getArguments()) removeBufferizationAttributes(bbArg); }); @@ -430,7 +429,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -439,11 +438,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. - for (func::FuncOp funcOp : orderedFuncOps) { + for (FunctionOpInterface funcOp : orderedFuncOps) { // Note: It would be good to apply cleanups here but we cannot as aliasInfo // would be invalidated. - if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) { + if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getName())) { // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; @@ -463,7 +462,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { // Functions were already bufferized. - if (isa(&op)) + if (isa(&op)) continue; if (failed(bufferizeOp(&op, options, statistics))) return failure(); @@ -490,12 +489,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // FuncOps whose names are specified in options.noAnalysisFuncFilter will // not be analyzed. Ops in these FuncOps will not be analyzed as well. OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { - auto func = dyn_cast(op); + auto func = dyn_cast(op); if (!func) - func = op->getParentOfType(); + func = op->getParentOfType(); if (func) return llvm::is_contained(options.noAnalysisFuncFilter, - func.getSymName()); + func.getName()); return false; }; OneShotBufferizationOptions updatedOptions(options); diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir index 3c50a9e72d9d9b..588aa8a85a84e6 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --transform-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s -split-input-file -verify-diagnostics | FileCheck %s // Test One-Shot Bufferize. @@ -12,19 +12,21 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor -func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: memref.copy %[[A_memref]], %[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: memref.copy %[[A_memref]], %[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor + // CHECK: return %[[res_tensor]] + return %0 : tensor + } } // ----- @@ -42,19 +44,21 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor // CHECK-NOT: memref.copy -func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor + // CHECK: return %[[res_tensor]] + return %0 : tensor + } } // ----- @@ -72,13 +76,15 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function_analysis( // CHECK-SAME: %[[A:.*]]: tensor -func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} - // CHECK-SAME: tensor - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - return %0 : tensor +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} + // CHECK-SAME: tensor + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + return %0 : tensor + } } // ----- @@ -95,10 +101,12 @@ module attributes {transform.with_named_sequence} { } } -func.func @test_unknown_op_failure() -> (tensor) { - // expected-error @+1 {{op was not bufferized}} - %0 = "test.dummy_op"() : () -> (tensor) - return %0 : tensor +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_unknown_op_failure() -> (tensor) { + // expected-error @+1 {{op was not bufferized}} + %0 = "test.dummy_op"() : () -> (tensor) + return %0 : tensor + } } // ----- @@ -111,7 +119,7 @@ module attributes {transform.with_named_sequence} { } } -module { +module @payload attributes { transform.target_tag = "payload" } { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { @@ -146,11 +154,13 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[A:.*]]: memref<12x9xf32>, // CHECK-SAME: %[[B:.*]]: memref<9x6xf32>, // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { -func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { - // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) - %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> - // CHECK: return %[[C]] : memref<12x6xf32> - return %D : tensor<12x6xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { + // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) + %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> + // CHECK: return %[[C]] : memref<12x6xf32> + return %D : tensor<12x6xf32> + } } // ----- @@ -165,10 +175,12 @@ module attributes {transform.with_named_sequence} { } // Expect `bufferization.empty_tensor_to_alloc_tensor` to replace the tensor.empty. -func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK: bufferization.alloc_tensor - %0 = tensor.empty() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK: bufferization.alloc_tensor + %0 = tensor.empty() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> + } } // ----- @@ -185,13 +197,15 @@ module attributes {transform.with_named_sequence} { // CHECK: tensor.extract_slice // CHECK: linalg.fill // CHECK: tensor.insert_slice -func.func @empty_tensor_elimination( - %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { - %0 = tensor.empty() : tensor<5xf32> - %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> - %2 = tensor.insert_slice %1 into %t [1][5][1] - : tensor<5xf32> into tensor<10xf32> - return %2 : tensor<10xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @empty_tensor_elimination( + %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { + %0 = tensor.empty() : tensor<5xf32> + %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> + %2 = tensor.insert_slice %1 into %t [1][5][1] + : tensor<5xf32> into tensor<10xf32> + return %2 : tensor<10xf32> + } } // ----- @@ -208,12 +222,14 @@ module attributes {transform.with_named_sequence} { // CHECK: memref.alloca // CHECK: scf.for // CHECK: memref.store -func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { - scf.for %iv = %lb to %ub step %step { - %0 = memref.alloca() : memref<5xf32> - memref.store %f, %0[%pos] : memref<5xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { + scf.for %iv = %lb to %ub step %step { + %0 = memref.alloca() : memref<5xf32> + memref.store %f, %0[%pos] : memref<5xf32> + } + return } - return } // ----- @@ -231,10 +247,12 @@ module attributes {transform.with_named_sequence} { // Expect `bufferization.bufferize_to_allocation` to create an alloc. // CHECK-LABEL: func.func @empty_to_tensor_alloc() -func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> - // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> - // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> - %0 = bufferization.alloc_tensor() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> + // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> + // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> + %0 = bufferization.alloc_tensor() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> + } } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir index c00b47fb936e97..3e637a3ec49a42 100644 --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -1,15 +1,17 @@ -// RUN: mlir-opt %s --transform-interpreter -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s +// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s // CHECK-LABEL: llvm.func @matmul_tensors -func.func @matmul_tensors( - %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> { -// CHECK-NOT: linalg -// CHECK: llvm.intr.fmuladd{{.*}} - %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) - outs(%arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> - return %0 : tensor<2x6xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_tensors( + %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> { + // CHECK-NOT: linalg + // CHECK: llvm.intr.fmuladd{{.*}} + %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) + outs(%arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> + return %0 : tensor<2x6xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir index 3f8d2ea06641e1..9c223737750a9b 100644 --- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir +++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --transform-interpreter %s | FileCheck %s +// RUN: mlir-opt --split-input-file --transform-interpreter="debug-payload-root-tag=payload" %s | FileCheck %s // CHECK-LABEL: func @matmul_divisible // CHECK: scf.forall @@ -24,19 +24,21 @@ // CHECK: scf.forall // CHECK: vector.transfer_read // CHECK: vector.transfer_write -func.func @matmul_divisible(%A: tensor<1024x1024xf32>, - %B: tensor<1024x1024xf32>, - %C: tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> -{ - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1024x1024xf32>) +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_divisible(%A: tensor<1024x1024xf32>, + %B: tensor<1024x1024xf32>, + %C: tensor<1024x1024xf32>) -> tensor<1024x1024xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) - outs(%0 : tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> - return %1 : tensor<1024x1024xf32> + { + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> + %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) + outs(%0 : tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> + return %1 : tensor<1024x1024xf32> + } } module attributes {transform.with_named_sequence} { @@ -143,19 +145,21 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.matmul // CHECK: vector.transfer_read // CHECK: vector.transfer_write +module @payload attributes { transform.target_tag = "payload" } { func.func @matmul_not_divisible(%A: tensor<1023x1023xf32>, - %B: tensor<1023x1023xf32>, - %C: tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> -{ - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1023x1023xf32>) + %B: tensor<1023x1023xf32>, + %C: tensor<1023x1023xf32>) -> tensor<1023x1023xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) - outs(%0 : tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> - return %1 : tensor<1023x1023xf32> + { + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> + %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) + outs(%0 : tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> + return %1 : tensor<1023x1023xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir index f2e9e839b7c46b..5e5657980ba120 100644 --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt --transform-interpreter -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s #map = affine_map<()[s0] -> (-s0 + 12, 7)> @@ -7,43 +7,45 @@ // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] - // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] - // CHECK: memref.copy %[[s0]], %[[alloc0_view]] - - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] - // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] - // CHECK: memref.copy %[[s1]], %[[alloc1_view]] - - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] - // No subview because there is 0 padding - // CHECK: memref.copy %[[s2]], %[[alloc2]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] + // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] + // CHECK: memref.copy %[[s0]], %[[alloc0_view]] + + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] + // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] + // CHECK: memref.copy %[[s1]], %[[alloc1_view]] + + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] + // No subview because there is 0 padding + // CHECK: memref.copy %[[s2]], %[[alloc2]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> + } } module attributes {transform.with_named_sequence} { @@ -69,40 +71,42 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] - - // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] - - // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] + + // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] + + // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir index 4b38db79bff3e1..0439844dc66cad 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -1,16 +1,18 @@ -// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s +// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s --split-input-file | FileCheck %s // CHECK-LABEL: func @matmul_tensors -func.func @matmul_tensors( - %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> { -// CHECK-NOT: linalg -// CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> -// CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> - %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) - outs(%arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> - return %0 : tensor<8x32xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_tensors( + %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> { + // CHECK-NOT: linalg + // CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> + // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> + %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) + outs(%arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> + return %0 : tensor<8x32xf32> + } } module attributes {transform.with_named_sequence} { @@ -76,11 +78,13 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xf16>, vector<64x64xf16> into vector<64x64xf32> // CHECK-NEXT: return %[[R]] : vector<64x64xf32> -func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { - %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> - %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> - %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> - return %result : vector<64x64xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { + %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> + %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> + %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> + return %result : vector<64x64xf32> + } } module attributes {transform.with_named_sequence} { @@ -95,30 +99,32 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 -// CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, -// CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { -// CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> -// CHECK: return %[[RES]] : vector<[4]x[4]xi32> -func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { - %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> - %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> - return %mul: vector<[4]x[4]xi32> -} +module @payload attributes { transform.target_tag = "payload" } { + // CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 + // CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, + // CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { + // CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> + // CHECK: return %[[RES]] : vector<[4]x[4]xi32> + func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { + %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> + %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> + return %mul: vector<[4]x[4]xi32> + } -// CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 -// CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, -// CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { -// CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> -// CHECK: return %[[RES]] : vector<8x16xf32> -func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { - %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> - %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> - %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> - %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> - return %mul: vector<8x16xf32> + // CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 + // CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, + // CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { + // CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> + // CHECK: return %[[RES]] : vector<8x16xf32> + func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { + %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> + %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> + %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> + %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> + return %mul: vector<8x16xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Examples/transform/ChH/full.mlir b/mlir/test/Examples/transform/ChH/full.mlir index 259475ebdbf49e..85dbf670233232 100644 --- a/mlir/test/Examples/transform/ChH/full.mlir +++ b/mlir/test/Examples/transform/ChH/full.mlir @@ -1,8 +1,6 @@ -// RUN: mlir-opt %s --transform-interpreter \ -// RUN: --test-transform-dialect-erase-schedule \ -// RUN: --math-uplift-to-fma \ -// RUN: --convert-bufferization-to-memref \ -// RUN: --test-lower-to-llvm |\ +// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" \ +// RUN: --test-transform-dialect-erase-schedule |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(builtin.module(math-uplift-to-fma,convert-bufferization-to-memref,test-lower-to-llvm))' - |\ // RUN: FileCheck %s // Fixed-size tensor types to be used in convolution. @@ -19,6 +17,7 @@ // tensors annotated with attributes from the `bufferization` dialect. These // attributes hint the bufferization pass to assume buffers can be directly // used for these tensors without reshaping. +module @payload attributes { transform.target_tag = "payload" } { func.func @conv( %input: !tinput {bufferization.writable = false, bufferization.access = "read", @@ -84,7 +83,7 @@ func.func @conv( return %relued : !toutput } - +} // Module containing the transformation script to be applied. The attribute // is required to correctly verify the use of named (macro-like) sequences. module attributes { transform.with_named_sequence } { From 470e5afe6969530f39eb46c129bfb52b2729ed37 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Tue, 24 Sep 2024 22:35:50 -0700 Subject: [PATCH 15/65] [bazel] Port f586b1e3f42788025aa6f55be70c5e361cc8b529 (#109908) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 304bee99b323f2..f5437245e8e135 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -12974,6 +12974,7 @@ cc_library( ":ControlFlowInterfaces", ":ConvertToLLVMInterface", ":DestinationStyleOpInterface", + ":FunctionInterfaces", ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface", From e1365ce2220dade4e771881dcde9a7e0511ce451 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 25 Sep 2024 07:44:05 +0200 Subject: [PATCH 16/65] [clang][bytecode][NFC] Add type assertions to ArrayElem{,Pop} (#109829) So we don't accidentally try to use those with the wrong type. --- clang/lib/AST/ByteCode/Interp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 1f4c302b26197f..b029399a1554b8 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2573,6 +2573,7 @@ inline bool ArrayElem(InterpState &S, CodePtr OpPC, uint32_t Index) { if (!CheckLoad(S, OpPC, Ptr)) return false; + assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name); S.Stk.push(Ptr.atIndex(Index).deref()); return true; } @@ -2584,6 +2585,7 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) { if (!CheckLoad(S, OpPC, Ptr)) return false; + assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name); S.Stk.push(Ptr.atIndex(Index).deref()); return true; } From 915fe84c6d78f3c0268f3de571eb323304089d23 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 22:53:16 -0700 Subject: [PATCH 17/65] [ctx_prof] Simplify ingestContext (NFC) (#109902) try_emplace can default-construct the value, so: try_emplace(CSId, CallTargetMapTy()) try_emplace(CSId) are equivalent to each other. We can further simplify the function using the fact that Map.try_emplace(Key).first->second is the same as Map[Key]. --- llvm/include/llvm/ProfileData/PGOCtxProfReader.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index beda07d7b8286c..a00c21ddc7d7a1 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -68,8 +68,7 @@ class PGOCtxProfContext final { CallsiteMapTy &callsites() { return Callsites; } void ingestContext(uint32_t CSId, PGOCtxProfContext &&Other) { - auto [Iter, _] = callsites().try_emplace(CSId, CallTargetMapTy()); - Iter->second.emplace(Other.guid(), std::move(Other)); + callsites()[CSId].emplace(Other.guid(), std::move(Other)); } void ingestAllContexts(uint32_t CSId, CallTargetMapTy &&Other) { From fa824dc0dd960214865b03d8f56b18bb93e4a88b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 13:58:23 +0800 Subject: [PATCH 18/65] [LLVM][IR] Add constant range support for floating-point types (#86483) This patch adds basic constant range support for floating-point types to enable range-based optimizations. --- llvm/include/llvm/ADT/APFloat.h | 5 + llvm/include/llvm/IR/ConstantFPRange.h | 204 ++++++++++ llvm/lib/IR/CMakeLists.txt | 1 + llvm/lib/IR/ConstantFPRange.cpp | 249 +++++++++++++ llvm/lib/Support/APFloat.cpp | 7 +- llvm/unittests/IR/CMakeLists.txt | 1 + llvm/unittests/IR/ConstantFPRangeTest.cpp | 433 ++++++++++++++++++++++ 7 files changed, 898 insertions(+), 2 deletions(-) create mode 100644 llvm/include/llvm/IR/ConstantFPRange.h create mode 100644 llvm/lib/IR/ConstantFPRange.cpp create mode 100644 llvm/unittests/IR/ConstantFPRangeTest.cpp diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 7039e961bff82d..9cc8369a0bf52b 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1534,6 +1534,11 @@ inline APFloat maximumnum(const APFloat &A, const APFloat &B) { return A < B ? B : A; } +inline raw_ostream &operator<<(raw_ostream &OS, const APFloat &V) { + V.print(OS); + return OS; +} + // We want the following functions to be available in the header for inlining. // We cannot define them inline in the class definition of `DoubleAPFloat` // because doing so would instantiate `std::unique_ptr` before diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h new file mode 100644 index 00000000000000..23f0e8b8e0d134 --- /dev/null +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -0,0 +1,204 @@ +//===- ConstantFPRange.h - Represent a range for floating-point -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Represent a range of possible values that may occur when the program is run +// for a floating-point value. This keeps track of a lower and upper bound for +// the constant. +// +// Range = [Lower, Upper] U (MayBeQNaN ? QNaN : {}) U (MayBeSNaN ? SNaN : {}) +// Specifically, [inf, -inf] represents an empty set. +// Note: +// 1. Bounds are inclusive. +// 2. -0 is considered to be less than 0. That is, range [0, 0] doesn't contain +// -0. +// 3. Currently wrapping ranges are not supported. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_CONSTANTFPRANGE_H +#define LLVM_IR_CONSTANTFPRANGE_H + +#include "llvm/ADT/APFloat.h" +#include "llvm/IR/Instructions.h" +#include + +namespace llvm { + +class raw_ostream; +struct KnownFPClass; + +/// This class represents a range of floating-point values. +class [[nodiscard]] ConstantFPRange { + APFloat Lower, Upper; + bool MayBeQNaN : 1; + bool MayBeSNaN : 1; + + /// Create empty constant range with same semantics. + ConstantFPRange getEmpty() const { + return ConstantFPRange(getSemantics(), /*IsFullSet=*/false); + } + + /// Create full constant range with same semantics. + ConstantFPRange getFull() const { + return ConstantFPRange(getSemantics(), /*IsFullSet=*/true); + } + + void makeEmpty(); + void makeFull(); + bool isNaNOnly() const; + + /// Initialize a full or empty set for the specified semantics. + explicit ConstantFPRange(const fltSemantics &Sem, bool IsFullSet); + +public: + /// Initialize a range to hold the single specified value. + explicit ConstantFPRange(const APFloat &Value); + + /// Initialize a range of values explicitly. + /// Note: If \p LowerVal is greater than \p UpperVal, please use the canonical + /// form [Inf, -Inf]. + ConstantFPRange(APFloat LowerVal, APFloat UpperVal, bool MayBeQNaN, + bool MayBeSNaN); + + /// Create empty constant range with the given semantics. + static ConstantFPRange getEmpty(const fltSemantics &Sem) { + return ConstantFPRange(Sem, /*IsFullSet=*/false); + } + + /// Create full constant range with the given semantics. + static ConstantFPRange getFull(const fltSemantics &Sem) { + return ConstantFPRange(Sem, /*IsFullSet=*/true); + } + + /// Helper for (-inf, inf) to represent all finite values. + static ConstantFPRange getFinite(const fltSemantics &Sem); + + /// Create a range which doesn't contain NaNs. + static ConstantFPRange getNonNaN(APFloat LowerVal, APFloat UpperVal) { + return ConstantFPRange(std::move(LowerVal), std::move(UpperVal), + /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); + } + + /// Create a range which may contain NaNs. + static ConstantFPRange getMayBeNaN(APFloat LowerVal, APFloat UpperVal) { + return ConstantFPRange(std::move(LowerVal), std::move(UpperVal), + /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); + } + + /// Create a range which only contains NaNs. + static ConstantFPRange getNaNOnly(const fltSemantics &Sem, bool MayBeQNaN, + bool MayBeSNaN); + + /// Produce the smallest range such that all values that may satisfy the given + /// predicate with any value contained within Other is contained in the + /// returned range. Formally, this returns a superset of + /// 'union over all y in Other . { x : fcmp op x y is true }'. If the exact + /// answer is not representable as a ConstantFPRange, the return value will be + /// a proper superset of the above. + /// + /// Example: Pred = ole and Other = float [2, 5] returns Result = [-inf, 5] + static ConstantFPRange makeAllowedFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other); + + /// Produce the largest range such that all values in the returned range + /// satisfy the given predicate with all values contained within Other. + /// Formally, this returns a subset of + /// 'intersection over all y in Other . { x : fcmp op x y is true }'. If the + /// exact answer is not representable as a ConstantFPRange, the return value + /// will be a proper subset of the above. + /// + /// Example: Pred = ole and Other = float [2, 5] returns [-inf, 2] + static ConstantFPRange makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other); + + /// Produce the exact range such that all values in the returned range satisfy + /// the given predicate with any value contained within Other. Formally, this + /// returns the exact answer when the superset of 'union over all y in Other + /// is exactly same as the subset of intersection over all y in Other. + /// { x : fcmp op x y is true}'. + /// + /// Example: Pred = olt and Other = float 3 returns [-inf, 3) + static ConstantFPRange makeExactFCmpRegion(FCmpInst::Predicate Pred, + const APFloat &Other); + + /// Does the predicate \p Pred hold between ranges this and \p Other? + /// NOTE: false does not mean that inverse predicate holds! + bool fcmp(FCmpInst::Predicate Pred, const ConstantFPRange &Other) const; + + /// Return the lower value for this range. + const APFloat &getLower() const { return Lower; } + + /// Return the upper value for this range. + const APFloat &getUpper() const { return Upper; } + + bool containsNaN() const { return MayBeQNaN || MayBeSNaN; } + bool containsQNaN() const { return MayBeQNaN; } + bool containsSNaN() const { return MayBeSNaN; } + + /// Get the semantics of this ConstantFPRange. + const fltSemantics &getSemantics() const { return Lower.getSemantics(); } + + /// Return true if this set contains all of the elements possible + /// for this data-type. + bool isFullSet() const; + + /// Return true if this set contains no members. + bool isEmptySet() const; + + /// Return true if the specified value is in the set. + bool contains(const APFloat &Val) const; + + /// Return true if the other range is a subset of this one. + bool contains(const ConstantFPRange &CR) const; + + /// If this set contains a single element, return it, otherwise return null. + const APFloat *getSingleElement() const; + + /// Return true if this set contains exactly one member. + bool isSingleElement() const { return getSingleElement() != nullptr; } + + /// Return true if the sign bit of all values in this range is 1. + /// Return false if the sign bit of all values in this range is 0. + /// Otherwise, return std::nullopt. + std::optional getSignBit() const; + + /// Return true if this range is equal to another range. + bool operator==(const ConstantFPRange &CR) const; + /// Return true if this range is not equal to another range. + bool operator!=(const ConstantFPRange &CR) const { return !operator==(CR); } + + /// Return the FPClassTest which will return true for the value. + FPClassTest classify() const; + + /// Return known floating-point classes for values in this range. + KnownFPClass toKnownFPClass() const; + + /// Print out the bounds to a stream. + void print(raw_ostream &OS) const; + + /// Allow printing from a debugger easily. + void dump() const; + + /// Return the range that results from the intersection of this range with + /// another range. + ConstantFPRange intersectWith(const ConstantFPRange &CR) const; + + /// Return the range that results from the union of this range + /// with another range. The resultant range is guaranteed to include the + /// elements of both sets, but may contain more. + ConstantFPRange unionWith(const ConstantFPRange &CR) const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { + CR.print(OS); + return OS; +} + +} // end namespace llvm + +#endif // LLVM_IR_CONSTANTFPRANGE_H diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 91e0e0cc65f36b..e5756940dd5a03 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMCore BuiltinGCs.cpp Comdat.cpp ConstantFold.cpp + ConstantFPRange.cpp ConstantRange.cpp ConstantRangeList.cpp Constants.cpp diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp new file mode 100644 index 00000000000000..58aab353b43939 --- /dev/null +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -0,0 +1,249 @@ +//===- ConstantFPRange.cpp - ConstantFPRange implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/ConstantFPRange.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +void ConstantFPRange::makeEmpty() { + auto &Sem = Lower.getSemantics(); + Lower = APFloat::getInf(Sem, /*Negative=*/false); + Upper = APFloat::getInf(Sem, /*Negative=*/true); + MayBeQNaN = false; + MayBeSNaN = false; +} + +void ConstantFPRange::makeFull() { + auto &Sem = Lower.getSemantics(); + Lower = APFloat::getInf(Sem, /*Negative=*/true); + Upper = APFloat::getInf(Sem, /*Negative=*/false); + MayBeQNaN = true; + MayBeSNaN = true; +} + +bool ConstantFPRange::isNaNOnly() const { + return Lower.isPosInfinity() && Upper.isNegInfinity(); +} + +ConstantFPRange::ConstantFPRange(const fltSemantics &Sem, bool IsFullSet) + : Lower(Sem, APFloat::uninitialized), Upper(Sem, APFloat::uninitialized) { + Lower = APFloat::getInf(Sem, /*Negative=*/IsFullSet); + Upper = APFloat::getInf(Sem, /*Negative=*/!IsFullSet); + MayBeQNaN = IsFullSet; + MayBeSNaN = IsFullSet; +} + +ConstantFPRange::ConstantFPRange(const APFloat &Value) + : Lower(Value.getSemantics(), APFloat::uninitialized), + Upper(Value.getSemantics(), APFloat::uninitialized) { + if (Value.isNaN()) { + makeEmpty(); + bool IsSNaN = Value.isSignaling(); + MayBeQNaN = !IsSNaN; + MayBeSNaN = IsSNaN; + } else { + Lower = Upper = Value; + MayBeQNaN = MayBeSNaN = false; + } +} + +// We treat that -0 is less than 0 here. +static APFloat::cmpResult strictCompare(const APFloat &LHS, + const APFloat &RHS) { + assert(!LHS.isNaN() && !RHS.isNaN() && "Unordered compare"); + if (LHS.isZero() && RHS.isZero()) { + if (LHS.isNegative() == RHS.isNegative()) + return APFloat::cmpEqual; + return LHS.isNegative() ? APFloat::cmpLessThan : APFloat::cmpGreaterThan; + } + return LHS.compare(RHS); +} + +static bool isNonCanonicalEmptySet(const APFloat &Lower, const APFloat &Upper) { + return strictCompare(Lower, Upper) == APFloat::cmpGreaterThan && + !(Lower.isInfinity() && Upper.isInfinity()); +} + +static void canonicalizeRange(APFloat &Lower, APFloat &Upper) { + if (isNonCanonicalEmptySet(Lower, Upper)) { + Lower = APFloat::getInf(Lower.getSemantics(), /*Negative=*/false); + Upper = APFloat::getInf(Upper.getSemantics(), /*Negative=*/true); + } +} + +ConstantFPRange::ConstantFPRange(APFloat LowerVal, APFloat UpperVal, + bool MayBeQNaN, bool MayBeSNaN) + : Lower(std::move(LowerVal)), Upper(std::move(UpperVal)) { + assert(&Lower.getSemantics() == &Upper.getSemantics() && + "Should only use the same semantics"); + assert(!isNonCanonicalEmptySet(Lower, Upper) && "Non-canonical form"); + this->MayBeQNaN = MayBeQNaN; + this->MayBeSNaN = MayBeSNaN; +} + +ConstantFPRange ConstantFPRange::getFinite(const fltSemantics &Sem) { + return ConstantFPRange(APFloat::getLargest(Sem, /*Negative=*/true), + APFloat::getLargest(Sem, /*Negative=*/false), + /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::getNaNOnly(const fltSemantics &Sem, + bool MayBeQNaN, bool MayBeSNaN) { + return ConstantFPRange(APFloat::getInf(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/true), MayBeQNaN, + MayBeSNaN); +} + +ConstantFPRange +ConstantFPRange::makeAllowedFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) { + // TODO + return getFull(Other.getSemantics()); +} + +ConstantFPRange +ConstantFPRange::makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) { + // TODO + return getEmpty(Other.getSemantics()); +} + +ConstantFPRange ConstantFPRange::makeExactFCmpRegion(FCmpInst::Predicate Pred, + const APFloat &Other) { + return makeAllowedFCmpRegion(Pred, ConstantFPRange(Other)); +} + +bool ConstantFPRange::fcmp(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) const { + return makeSatisfyingFCmpRegion(Pred, Other).contains(*this); +} + +bool ConstantFPRange::isFullSet() const { + return Lower.isNegInfinity() && Upper.isPosInfinity() && MayBeQNaN && + MayBeSNaN; +} + +bool ConstantFPRange::isEmptySet() const { + return Lower.isPosInfinity() && Upper.isNegInfinity() && !MayBeQNaN && + !MayBeSNaN; +} + +bool ConstantFPRange::contains(const APFloat &Val) const { + assert(&getSemantics() == &Val.getSemantics() && + "Should only use the same semantics"); + + if (Val.isNaN()) + return Val.isSignaling() ? MayBeSNaN : MayBeQNaN; + return strictCompare(Lower, Val) != APFloat::cmpGreaterThan && + strictCompare(Val, Upper) != APFloat::cmpGreaterThan; +} + +bool ConstantFPRange::contains(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + + if (CR.MayBeQNaN && !MayBeQNaN) + return false; + + if (CR.MayBeSNaN && !MayBeSNaN) + return false; + + return strictCompare(Lower, CR.Lower) != APFloat::cmpGreaterThan && + strictCompare(CR.Upper, Upper) != APFloat::cmpGreaterThan; +} + +const APFloat *ConstantFPRange::getSingleElement() const { + if (MayBeSNaN || MayBeQNaN) + return nullptr; + return Lower.bitwiseIsEqual(Upper) ? &Lower : nullptr; +} + +std::optional ConstantFPRange::getSignBit() const { + if (!MayBeSNaN && !MayBeQNaN && Lower.isNegative() == Upper.isNegative()) + return Lower.isNegative(); + return std::nullopt; +} + +bool ConstantFPRange::operator==(const ConstantFPRange &CR) const { + if (MayBeSNaN != CR.MayBeSNaN || MayBeQNaN != CR.MayBeQNaN) + return false; + return Lower.bitwiseIsEqual(CR.Lower) && Upper.bitwiseIsEqual(CR.Upper); +} + +FPClassTest ConstantFPRange::classify() const { + uint32_t Mask = fcNone; + if (MayBeSNaN) + Mask |= fcSNan; + if (MayBeQNaN) + Mask |= fcQNan; + if (!isNaNOnly()) { + FPClassTest LowerMask = Lower.classify(); + FPClassTest UpperMask = Upper.classify(); + assert(LowerMask <= UpperMask && "Range is nan-only."); + for (uint32_t I = LowerMask; I <= UpperMask; I <<= 1) + Mask |= I; + } + return static_cast(Mask); +} + +KnownFPClass ConstantFPRange::toKnownFPClass() const { + KnownFPClass Result; + Result.KnownFPClasses = classify(); + Result.SignBit = getSignBit(); + return Result; +} + +void ConstantFPRange::print(raw_ostream &OS) const { + if (isFullSet()) + OS << "full-set"; + else if (isEmptySet()) + OS << "empty-set"; + else { + bool NaNOnly = isNaNOnly(); + if (!NaNOnly) + OS << '[' << Lower << ", " << Upper << ']'; + + if (MayBeSNaN || MayBeQNaN) { + if (!NaNOnly) + OS << " with "; + if (MayBeSNaN && MayBeQNaN) + OS << "NaN"; + else if (MayBeSNaN) + OS << "SNaN"; + else if (MayBeQNaN) + OS << "QNaN"; + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ConstantFPRange::dump() const { print(dbgs()); } +#endif + +ConstantFPRange +ConstantFPRange::intersectWith(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + APFloat NewLower = maxnum(Lower, CR.Lower); + APFloat NewUpper = minnum(Upper, CR.Upper); + canonicalizeRange(NewLower, NewUpper); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), + MayBeQNaN & CR.MayBeQNaN, MayBeSNaN & CR.MayBeSNaN); +} + +ConstantFPRange ConstantFPRange::unionWith(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + return ConstantFPRange(minnum(Lower, CR.Lower), maxnum(Upper, CR.Upper), + MayBeQNaN | CR.MayBeQNaN, MayBeSNaN | CR.MayBeSNaN); +} diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 7f68c5ab9b7cf7..dee917fd56104c 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -5366,11 +5366,14 @@ APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { void APFloat::print(raw_ostream &OS) const { SmallVector Buffer; toString(Buffer); - OS << Buffer << "\n"; + OS << Buffer; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void APFloat::dump() const { + print(dbgs()); + dbgs() << '\n'; +} #endif void APFloat::Profile(FoldingSetNodeID &NID) const { diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index 633166221c6907..e5c8630f3eed77 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_unittest(IRTests BasicBlockTest.cpp BasicBlockDbgInfoTest.cpp CFGBuilder.cpp + ConstantFPRangeTest.cpp ConstantRangeTest.cpp ConstantRangeListTest.cpp ConstantsTest.cpp diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp new file mode 100644 index 00000000000000..dbb6c4545a1831 --- /dev/null +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -0,0 +1,433 @@ +//===- ConstantRangeTest.cpp - ConstantRange tests ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/ConstantFPRange.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/KnownBits.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +class ConstantFPRangeTest : public ::testing::Test { +protected: + static const fltSemantics &Sem; + static ConstantFPRange Full; + static ConstantFPRange Empty; + static ConstantFPRange Finite; + static ConstantFPRange One; + static ConstantFPRange PosZero; + static ConstantFPRange NegZero; + static ConstantFPRange Zero; + static ConstantFPRange PosInf; + static ConstantFPRange NegInf; + static ConstantFPRange Denormal; + static ConstantFPRange NaN; + static ConstantFPRange SNaN; + static ConstantFPRange QNaN; + static ConstantFPRange Some; + static ConstantFPRange SomePos; + static ConstantFPRange SomeNeg; +}; + +const fltSemantics &ConstantFPRangeTest::Sem = APFloat::IEEEdouble(); +ConstantFPRange ConstantFPRangeTest::Full = + ConstantFPRange::getFull(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::Empty = + ConstantFPRange::getEmpty(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::Finite = + ConstantFPRange::getFinite(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::One = ConstantFPRange(APFloat(1.0)); +ConstantFPRange ConstantFPRangeTest::PosZero = ConstantFPRange( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::NegZero = + ConstantFPRange(APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true)); +ConstantFPRange ConstantFPRangeTest::Zero = ConstantFPRange::getNonNaN( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true), + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::Denormal = + ConstantFPRange(APFloat::getSmallest(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::PosInf = + ConstantFPRange(APFloat::getInf(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::NegInf = + ConstantFPRange(APFloat::getInf(APFloat::IEEEdouble(), /*Negative=*/true)); +ConstantFPRange ConstantFPRangeTest::NaN = ConstantFPRange::getNaNOnly( + APFloat::IEEEdouble(), /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); +ConstantFPRange ConstantFPRangeTest::SNaN = + ConstantFPRange(APFloat::getSNaN(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::QNaN = + ConstantFPRange(APFloat::getQNaN(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::Some = + ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(3.0)); +ConstantFPRange ConstantFPRangeTest::SomePos = ConstantFPRange::getNonNaN( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false), APFloat(3.0)); +ConstantFPRange ConstantFPRangeTest::SomeNeg = ConstantFPRange::getNonNaN( + APFloat(-3.0), APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true)); + +static void strictNext(APFloat &V) { + // Note: nextUp(+/-0) is smallest. + if (V.isNegZero()) + V = APFloat::getZero(V.getSemantics(), /*Negative=*/false); + else + V.next(/*nextDown=*/false); +} + +template +static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive, + bool MayBeQNaN, bool MayBeSNaN) { + const fltSemantics &Sem = APFloat::Float8E4M3(); + APFloat PosInf = APFloat::getInf(Sem, /*Negative=*/false); + APFloat NegInf = APFloat::getInf(Sem, /*Negative=*/true); + TestFn(ConstantFPRange(PosInf, NegInf, MayBeQNaN, MayBeSNaN)); + + if (!Exhaustive) { + SmallVector Values; + Values.push_back(APFloat::getInf(Sem, /*Negative=*/true)); + Values.push_back(APFloat::getLargest(Sem, /*Negative=*/true)); + unsigned BitWidth = APFloat::semanticsSizeInBits(Sem); + unsigned Exponents = APFloat::semanticsMaxExponent(Sem) - + APFloat::semanticsMinExponent(Sem) + 3; + unsigned MantissaBits = APFloat::semanticsPrecision(Sem) - 1; + // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent) + for (unsigned M = Exponents - 2; M != 0; --M) + Values.push_back( + APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits))); + Values.push_back(APFloat::getSmallest(Sem, /*Negative=*/true)); + Values.push_back(APFloat::getZero(Sem, /*Negative=*/true)); + size_t E = Values.size(); + for (size_t I = 1; I <= E; ++I) + Values.push_back(-Values[E - I]); + for (size_t I = 0; I != Values.size(); ++I) + for (size_t J = I; J != Values.size(); ++J) + TestFn(ConstantFPRange(Values[I], Values[J], MayBeQNaN, MayBeSNaN)); + return; + } + + auto Next = [&](APFloat &V) { + if (V.isPosInfinity()) + return false; + strictNext(V); + return true; + }; + + APFloat Lower = NegInf; + do { + APFloat Upper = Lower; + do { + TestFn(ConstantFPRange(Lower, Upper, MayBeQNaN, MayBeSNaN)); + } while (Next(Upper)); + } while (Next(Lower)); +} + +template +static void EnumerateConstantFPRanges(Fn TestFn, bool Exhaustive) { + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/false); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/true); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/true); +} + +template +static void EnumerateTwoInterestingConstantFPRanges(Fn TestFn, + bool Exhaustive) { + EnumerateConstantFPRanges( + [&](const ConstantFPRange &CR1) { + EnumerateConstantFPRanges( + [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Exhaustive); + }, + Exhaustive); +} + +template +static void EnumerateValuesInConstantFPRange(const ConstantFPRange &CR, + Fn TestFn) { + const fltSemantics &Sem = CR.getSemantics(); + unsigned Bits = APFloat::semanticsSizeInBits(Sem); + assert(Bits < 32 && "Too many bits"); + for (unsigned I = 0, E = (1U << Bits) - 1; I != E; ++I) { + APFloat V(Sem, APInt(Bits, I)); + if (CR.contains(V)) + TestFn(V); + } +} + +TEST_F(ConstantFPRangeTest, Basics) { + EXPECT_TRUE(Full.isFullSet()); + EXPECT_FALSE(Full.isEmptySet()); + EXPECT_TRUE(Full.contains(APFloat::getNaN(Sem))); + EXPECT_TRUE(Full.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_TRUE(Full.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(Full.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_TRUE(Full.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_TRUE(Full.contains(APFloat::getSmallest(Sem))); + EXPECT_TRUE(Full.contains(APFloat(2.0))); + EXPECT_TRUE(Full.contains(Full)); + EXPECT_TRUE(Full.contains(Empty)); + EXPECT_TRUE(Full.contains(Finite)); + EXPECT_TRUE(Full.contains(Zero)); + EXPECT_TRUE(Full.contains(Some)); + + EXPECT_FALSE(Empty.isFullSet()); + EXPECT_TRUE(Empty.isEmptySet()); + EXPECT_FALSE(Empty.contains(APFloat::getNaN(Sem))); + EXPECT_FALSE(Empty.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_FALSE(Empty.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_FALSE(Empty.contains(APFloat(2.0))); + EXPECT_TRUE(Empty.contains(Empty)); + + EXPECT_FALSE(Finite.isFullSet()); + EXPECT_FALSE(Finite.isEmptySet()); + EXPECT_FALSE(Finite.contains(APFloat::getNaN(Sem))); + EXPECT_FALSE(Finite.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_FALSE(Finite.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(Finite.contains(APFloat::getLargest(Sem, /*Negative=*/false))); + EXPECT_TRUE(Finite.contains(APFloat::getLargest(Sem, /*Negative=*/true))); + EXPECT_TRUE(Finite.contains(Finite)); + EXPECT_TRUE(Finite.contains(Some)); + EXPECT_TRUE(Finite.contains(Denormal)); + EXPECT_TRUE(Finite.contains(Zero)); + EXPECT_FALSE(Finite.contains(PosInf)); + EXPECT_FALSE(Finite.contains(NaN)); + + EXPECT_TRUE(One.contains(APFloat(1.0))); + EXPECT_FALSE(One.contains(APFloat(1.1))); + + EXPECT_TRUE(PosZero.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_FALSE(PosZero.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_TRUE(NegZero.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_FALSE(NegZero.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_TRUE(Zero.contains(PosZero)); + EXPECT_TRUE(Zero.contains(NegZero)); + EXPECT_TRUE(Denormal.contains(APFloat::getSmallest(Sem))); + EXPECT_FALSE(Denormal.contains(APFloat::getSmallestNormalized(Sem))); + EXPECT_TRUE(PosInf.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_TRUE(NegInf.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(NaN.contains(APFloat::getQNaN(Sem))); + EXPECT_TRUE(NaN.contains(APFloat::getSNaN(Sem))); + EXPECT_TRUE(NaN.contains(SNaN)); + EXPECT_TRUE(NaN.contains(QNaN)); + + EXPECT_TRUE(Some.contains(APFloat(3.0))); + EXPECT_TRUE(Some.contains(APFloat(-3.0))); + EXPECT_FALSE(Some.contains(APFloat(4.0))); + APFloat Next1(3.0); + Next1.next(/*nextDown=*/true); + EXPECT_TRUE(Some.contains(Next1)); + APFloat Next2(3.0); + Next2.next(/*nextDown=*/false); + EXPECT_FALSE(Some.contains(Next2)); + EXPECT_TRUE(Some.contains(Zero)); + EXPECT_TRUE(Some.contains(Some)); + EXPECT_TRUE(Some.contains(One)); + EXPECT_FALSE(Some.contains(NaN)); + EXPECT_FALSE(Some.contains(PosInf)); + EXPECT_TRUE(SomePos.contains(APFloat(3.0))); + EXPECT_FALSE(SomeNeg.contains(APFloat(3.0))); + EXPECT_TRUE(SomeNeg.contains(APFloat(-3.0))); + EXPECT_FALSE(SomePos.contains(APFloat(-3.0))); + EXPECT_TRUE(Some.contains(SomePos)); + EXPECT_TRUE(Some.contains(SomeNeg)); +} + +TEST_F(ConstantFPRangeTest, Equality) { + EXPECT_EQ(Full, Full); + EXPECT_EQ(Empty, Empty); + EXPECT_EQ(One, One); + EXPECT_EQ(Some, Some); + EXPECT_NE(Full, Empty); + EXPECT_NE(Zero, PosZero); + EXPECT_NE(One, NaN); + EXPECT_NE(Some, One); + EXPECT_NE(SNaN, QNaN); +} + +TEST_F(ConstantFPRangeTest, SingleElement) { + EXPECT_EQ(Full.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Empty.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Finite.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Zero.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(NaN.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(SNaN.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(QNaN.getSingleElement(), static_cast(nullptr)); + + EXPECT_EQ(*One.getSingleElement(), APFloat(1.0)); + EXPECT_EQ(*PosZero.getSingleElement(), APFloat::getZero(Sem)); + EXPECT_EQ(*PosInf.getSingleElement(), APFloat::getInf(Sem)); + + EXPECT_FALSE(Full.isSingleElement()); + EXPECT_FALSE(Empty.isSingleElement()); + EXPECT_TRUE(One.isSingleElement()); + EXPECT_FALSE(Some.isSingleElement()); + EXPECT_FALSE(Zero.isSingleElement()); +} + +TEST_F(ConstantFPRangeTest, ExhaustivelyEnumerate) { + constexpr unsigned NNaNValues = (1 << 8) - 2 * ((1 << 3) - 1); + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + /*Exhaustive=*/true); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, Enumerate) { + constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 4); + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + /*Exhaustive=*/false); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, IntersectWith) { + EXPECT_EQ(Empty.intersectWith(Full), Empty); + EXPECT_EQ(Empty.intersectWith(Empty), Empty); + EXPECT_EQ(Empty.intersectWith(One), Empty); + EXPECT_EQ(Empty.intersectWith(Some), Empty); + EXPECT_EQ(Full.intersectWith(Full), Full); + EXPECT_EQ(Some.intersectWith(Some), Some); + EXPECT_EQ(Some.intersectWith(One), One); + EXPECT_EQ(Full.intersectWith(One), One); + EXPECT_EQ(Full.intersectWith(Some), Some); + EXPECT_EQ(Some.intersectWith(SomePos), SomePos); + EXPECT_EQ(Some.intersectWith(SomeNeg), SomeNeg); + EXPECT_EQ(NaN.intersectWith(Finite), Empty); + EXPECT_EQ(NaN.intersectWith(SNaN), SNaN); + EXPECT_EQ(NaN.intersectWith(QNaN), QNaN); + EXPECT_EQ(Finite.intersectWith(One), One); + EXPECT_EQ(Some.intersectWith(Zero), Zero); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(4.0)) + .intersectWith( + ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(4.0))); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)) + .intersectWith( + ConstantFPRange::getNonNaN(APFloat(5.0), APFloat(6.0))), + Empty); +} + +TEST_F(ConstantFPRangeTest, UnionWith) { + EXPECT_EQ(Empty.unionWith(Full), Full); + EXPECT_EQ(Empty.unionWith(Empty), Empty); + EXPECT_EQ(Empty.unionWith(One), One); + EXPECT_EQ(Empty.unionWith(Some), Some); + EXPECT_EQ(Full.unionWith(Full), Full); + EXPECT_EQ(Some.unionWith(Some), Some); + EXPECT_EQ(Some.unionWith(One), Some); + EXPECT_EQ(Full.unionWith(Some), Full); + EXPECT_EQ(Some.unionWith(SomePos), Some); + EXPECT_EQ(Some.unionWith(SomeNeg), Some); + EXPECT_EQ(Finite.unionWith(One), Finite); + EXPECT_EQ(Some.unionWith(Zero), Some); + EXPECT_EQ(Finite.unionWith(PosInf).unionWith(NegInf).unionWith(NaN), Full); + EXPECT_EQ(PosZero.unionWith(NegZero), Zero); + EXPECT_EQ(NaN.unionWith(SNaN), NaN); + EXPECT_EQ(NaN.unionWith(QNaN), NaN); + EXPECT_EQ(SNaN.unionWith(QNaN), NaN); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(4.0)) + .unionWith(ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(6.0))); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)) + .unionWith(ConstantFPRange::getNonNaN(APFloat(5.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(6.0))); +} + +TEST_F(ConstantFPRangeTest, FPClassify) { + EXPECT_EQ(Empty.classify(), fcNone); + EXPECT_EQ(Full.classify(), fcAllFlags); + EXPECT_EQ(Finite.classify(), fcFinite); + EXPECT_EQ(Zero.classify(), fcZero); + EXPECT_EQ(NaN.classify(), fcNan); + EXPECT_EQ(SNaN.classify(), fcSNan); + EXPECT_EQ(QNaN.classify(), fcQNan); + EXPECT_EQ(One.classify(), fcPosNormal); + EXPECT_EQ(Some.classify(), fcFinite); + EXPECT_EQ(SomePos.classify(), fcPosFinite); + EXPECT_EQ(SomeNeg.classify(), fcNegFinite); + EXPECT_EQ(PosInf.classify(), fcPosInf); + EXPECT_EQ(NegInf.classify(), fcNegInf); + EXPECT_TRUE(SomePos.toKnownFPClass().cannotBeOrderedLessThanZero()); + EXPECT_EQ(Finite.getSignBit(), std::nullopt); + EXPECT_EQ(PosZero.getSignBit(), false); + EXPECT_EQ(NegZero.getSignBit(), true); + EXPECT_EQ(SomePos.getSignBit(), false); + EXPECT_EQ(SomeNeg.getSignBit(), true); + EXPECT_EQ(SomePos.toKnownFPClass().SignBit, false); + EXPECT_EQ(SomeNeg.toKnownFPClass().SignBit, true); + + EnumerateConstantFPRanges( + [](const ConstantFPRange &CR) { + unsigned Mask = fcNone; + bool HasPos = false, HasNeg = false; + EnumerateValuesInConstantFPRange(CR, [&](const APFloat &V) { + Mask |= V.classify(); + if (V.isNegative()) + HasNeg = true; + else + HasPos = true; + }); + + std::optional SignBit = std::nullopt; + if (HasPos != HasNeg) + SignBit = HasNeg; + + EXPECT_EQ(SignBit, CR.getSignBit()) << CR; + EXPECT_EQ(Mask, CR.classify()) << CR; + }, + /*Exhaustive=*/true); +} + +TEST_F(ConstantFPRangeTest, Print) { + auto ToString = [](const ConstantFPRange &CR) { + std::string Str; + raw_string_ostream OS(Str); + CR.print(OS); + return Str; + }; + + EXPECT_EQ(ToString(Full), "full-set"); + EXPECT_EQ(ToString(Empty), "empty-set"); + EXPECT_EQ(ToString(NaN), "NaN"); + EXPECT_EQ(ToString(SNaN), "SNaN"); + EXPECT_EQ(ToString(QNaN), "QNaN"); + EXPECT_EQ(ToString(One), "[1, 1]"); + EXPECT_EQ(ToString(Some.unionWith(SNaN)), "[-3, 3] with SNaN"); +} + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG +TEST_F(ConstantFPRangeTest, NonCanonicalEmptySet) { + EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0)), + "Non-canonical form"); +} +TEST_F(ConstantFPRangeTest, MismatchedSemantics) { + EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f)), + "Should only use the same semantics"); + EXPECT_DEATH(One.contains(APFloat(1.0f)), + "Should only use the same semantics"); + ConstantFPRange OneF32 = ConstantFPRange(APFloat(1.0f)); + EXPECT_DEATH(One.contains(OneF32), "Should only use the same semantics"); + EXPECT_DEATH(One.intersectWith(OneF32), "Should only use the same semantics"); + EXPECT_DEATH(One.unionWith(OneF32), "Should only use the same semantics"); +} +#endif +#endif + +} // anonymous namespace From d8f555d62546fc1fbee3d2a48b37779c629bf87b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 13:59:10 +0800 Subject: [PATCH 19/65] [UBSan] Diagnose assumption violation (#104741) This patch extends [D34590](https://reviews.llvm.org/D34590) to check assumption violations. --------- Co-authored-by: Vitaly Buka --- clang/lib/CodeGen/CGBuiltin.cpp | 21 ++++++++++++++++--- clang/lib/CodeGen/CGStmt.cpp | 2 +- clang/lib/CodeGen/CodeGenFunction.h | 5 +++++ clang/test/CodeGen/ubsan-builtin-checks.c | 17 +++++++++++++++ compiler-rt/lib/ubsan/ubsan_handlers.cpp | 9 +++++--- compiler-rt/lib/ubsan/ubsan_handlers.h | 1 + .../test/ubsan/TestCases/Misc/builtins.cpp | 16 ++++++++++++-- 7 files changed, 62 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 942468204f054c..7bbf22569e73ca 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1997,8 +1997,8 @@ struct CallObjCArcUse final : EHScopeStack::Cleanup { Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind) { - assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) - && "Unsupported builtin check kind"); + assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && + "Unsupported builtin check kind"); Value *ArgValue = EmitScalarExpr(E); if (!SanOpts.has(SanitizerKind::Builtin)) @@ -2015,6 +2015,21 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, return ArgValue; } +Value *CodeGenFunction::EmitCheckedArgForAssume(const Expr *E) { + Value *ArgValue = EvaluateExprAsBool(E); + if (!SanOpts.has(SanitizerKind::Builtin)) + return ArgValue; + + SanitizerScope SanScope(this); + EmitCheck( + std::make_pair(ArgValue, SanitizerKind::Builtin), + SanitizerHandler::InvalidBuiltin, + {EmitCheckSourceLocation(E->getExprLoc()), + llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)}, + std::nullopt); + return ArgValue; +} + static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { return CGF.Builder.CreateBinaryIntrinsic( Intrinsic::abs, ArgValue, @@ -3428,7 +3443,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (E->getArg(0)->HasSideEffects(getContext())) return RValue::get(nullptr); - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0)); Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 623857b43a5575..9bf15fca0de489 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -754,7 +754,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { const Expr *Assumption = cast(A)->getAssumption(); if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() && !Assumption->HasSideEffects(getContext())) { - llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption); + llvm::Value *AssumptionVal = EmitCheckedArgForAssume(Assumption); Builder.CreateAssumption(AssumptionVal); } } break; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 2df17e83bae2ee..d4e46f77868eca 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5084,12 +5084,17 @@ class CodeGenFunction : public CodeGenTypeCache { enum BuiltinCheckKind { BCK_CTZPassedZero, BCK_CLZPassedZero, + BCK_AssumePassedFalse, }; /// Emits an argument for a call to a builtin. If the builtin sanitizer is /// enabled, a runtime check specified by \p Kind is also emitted. llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind); + /// Emits an argument for a call to a `__builtin_assume`. If the builtin + /// sanitizer is enabled, a runtime check is also emitted. + llvm::Value *EmitCheckedArgForAssume(const Expr *E); + /// Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); diff --git a/clang/test/CodeGen/ubsan-builtin-checks.c b/clang/test/CodeGen/ubsan-builtin-checks.c index c7f6078f903bad..8535ec915ac346 100644 --- a/clang/test/CodeGen/ubsan-builtin-checks.c +++ b/clang/test/CodeGen/ubsan-builtin-checks.c @@ -51,3 +51,20 @@ void check_clz(int n) { // CHECK: call void @__ubsan_handle_invalid_builtin __builtin_clzg((unsigned int)n); } + +// CHECK: define{{.*}} void @check_assume +void check_assume(int n) { + // CHECK: [[TOBOOL:%.*]] = icmp ne i32 [[N:%.*]], 0 + // CHECK-NEXT: br i1 [[TOBOOL]] + // + // Handler block: + // CHECK: call void @__ubsan_handle_invalid_builtin + // CHECK-NEXT: unreachable + // + // Continuation block: + // CHECK: call void @llvm.assume(i1 [[TOBOOL]]) + __builtin_assume(n); + + // CHECK: call void @__ubsan_handle_invalid_builtin + __attribute__((assume(n))); +} diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 9dbe8e6c0c1745..a419cf0b2b5557 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -633,9 +633,12 @@ static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts) { ScopedReport R(Opts, Loc, ET); - Diag(Loc, DL_Error, ET, - "passing zero to __builtin_%0(), which is not a valid argument") - << ((Data->Kind == BCK_CTZPassedZero) ? "ctz" : "clz"); + if (Data->Kind == BCK_AssumePassedFalse) + Diag(Loc, DL_Error, ET, "assumption is violated during execution"); + else + Diag(Loc, DL_Error, ET, + "passing zero to __builtin_%0(), which is not a valid argument") + << ((Data->Kind == BCK_CTZPassedZero) ? "ctz" : "clz"); } void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data) { diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index bae661a56833dd..4ffa1439a1323f 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -159,6 +159,7 @@ RECOVERABLE(implicit_conversion, ImplicitConversionData *Data, ValueHandle Src, enum BuiltinCheckKind : unsigned char { BCK_CTZPassedZero, BCK_CLZPassedZero, + BCK_AssumePassedFalse, }; struct InvalidBuiltinData { diff --git a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp index a635f7fcc686ed..2702065bce0678 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp @@ -1,8 +1,8 @@ // REQUIRES: target={{x86_64.*}} // -// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t +// RUN: %clangxx -fsanitize=builtin -fno-inline -w %s -O3 -o %t // RUN: %run %t 2>&1 | FileCheck %s --check-prefix=RECOVER -// RUN: %clangxx -fsanitize=builtin -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort +// RUN: %clangxx -fsanitize=builtin -fno-inline -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort // RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT void check_ctz(int n) { @@ -28,8 +28,20 @@ void check_clz(int n) { __builtin_clzll(n); } +void check_assume(int n) { + // RECOVER: builtins.cpp:[[@LINE+1]]:20: runtime error: assumption is violated during execution + __builtin_assume(n); +} + +void check_assume_attr(int n) { + // RECOVER: builtins.cpp:[[@LINE+1]]:25: runtime error: assumption is violated during execution + __attribute__((assume(n))); +} + int main() { check_ctz(0); check_clz(0); + check_assume(0); + check_assume_attr(0); return 0; } From 4bd3a62cd600c607e7cb3d69dd75ac4069e3eef9 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 25 Sep 2024 08:00:32 +0200 Subject: [PATCH 20/65] [clang][bytecode] Fix diagnosing std::construct_at with wrong type (#109828) We can't make the assumption that types are always fine in std functions. --- clang/lib/AST/ByteCode/Descriptor.cpp | 11 ++++++++--- clang/lib/AST/ByteCode/Interp.cpp | 4 ---- clang/test/AST/ByteCode/placement-new.cpp | 12 +++++++++++- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 170203fe818775..05ece907af42f4 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -389,12 +389,17 @@ Descriptor::Descriptor(const DeclTy &D) } QualType Descriptor::getType() const { - if (const auto *E = asExpr()) - return E->getType(); if (const auto *D = asValueDecl()) return D->getType(); - if (const auto *T = dyn_cast(asDecl())) + if (const auto *T = dyn_cast_if_present(asDecl())) return QualType(T->getTypeForDecl(), 0); + + // The Source sometimes has a different type than the once + // we really save. Try to consult the Record first. + if (isRecord()) + return QualType(ElemRecord->getDecl()->getTypeForDecl(), 0); + if (const auto *E = asExpr()) + return E->getType(); llvm_unreachable("Invalid descriptor type"); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 8b578ccbeb6792..b9c85626ffa990 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1296,10 +1296,6 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E, if (!InvalidNewDeleteExpr(S, OpPC, E)) return false; - // Assume proper types in std functions. - if (S.Current->isStdFunction()) - return true; - const auto *NewExpr = cast(E); QualType StorageType = Ptr.getType(); diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 7a562adae02a6f..1ff6ff3ac19223 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -13,7 +13,8 @@ namespace std { }; template constexpr void construct_at(void *p, Args &&...args) { - new (p) T((Args&&)args...); // both-note {{in call to}} + new (p) T((Args&&)args...); // both-note {{in call to}} \ + // both-note {{placement new would change type of storage from 'int' to 'float'}} } } @@ -260,4 +261,13 @@ namespace ConstructAt { static_assert(ctorFail()); // both-error {{not an integral constant expression}} \ // both-note {{in call to 'ctorFail()'}} + + constexpr bool bad_construct_at_type() { + int a; + std::construct_at(&a, 1.0f); // both-note {{in call to}} + return true; + } + static_assert(bad_construct_at_type()); // both-error {{not an integral constant expression}} \ + // both-note {{in call}} + } From 416f101111e06339387ac456c288cb5e60f41bc1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 23:01:45 -0700 Subject: [PATCH 21/65] [clang] Use std::optional::value_or (NFC) (#109894) --- clang/include/clang/AST/PropertiesBase.td | 2 +- clang/include/clang/Basic/FileManager.h | 2 +- clang/lib/APINotes/APINotesYAMLCompiler.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/AST/PropertiesBase.td b/clang/include/clang/AST/PropertiesBase.td index 9b934b20cf2559..3057669e3758b5 100644 --- a/clang/include/clang/AST/PropertiesBase.td +++ b/clang/include/clang/AST/PropertiesBase.td @@ -39,7 +39,7 @@ class EnumPropertyType : PropertyType {} /// Supports optional values by using the null representation. class RefPropertyType : PropertyType { let PackOptional = - "value ? *value : nullptr"; + "value.value_or(nullptr)"; let UnpackOptional = "value ? std::optional<" # CXXName # ">(value) : std::nullopt"; } diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 527bbef24793ee..74029a91d1a6d0 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -293,7 +293,7 @@ class FileManager : public RefCountedBase { bool RequiresNullTerminator = true, std::optional MaybeLimit = std::nullopt) const { return getBufferForFileImpl(Filename, - /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), + /*FileSize=*/MaybeLimit.value_or(-1), isVolatile, RequiresNullTerminator); } diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp index 16fd59244086fd..f72a1d65b5456f 100644 --- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp +++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp @@ -757,8 +757,8 @@ class YAMLConverter { OutInfo.addTypeInfo(idx++, N); audited = Nullability.size() > 0 || ReturnNullability; if (audited) - OutInfo.addTypeInfo(0, ReturnNullability ? *ReturnNullability - : NullabilityKind::NonNull); + OutInfo.addTypeInfo(0, + ReturnNullability.value_or(NullabilityKind::NonNull)); if (!audited) return; OutInfo.NullabilityAudited = audited; From c2fd3b76f21632fa617a18c50dc9dff84b1a6538 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 25 Sep 2024 08:03:12 +0200 Subject: [PATCH 22/65] [ASan][test] XFAIL stack overflow tests on Linux/sparc64 (#109773) When enabling ASan SPARC testing as per PR #107405, 3 stack overflow tests `FAIL` on Linux/sparc64: ``` AddressSanitizer-sparc-linux :: TestCases/Linux/stack-overflow-recovery-mode.cpp AddressSanitizer-sparc-linux :: TestCases/Linux/stack-overflow-sigbus.cpp AddressSanitizer-sparc-linux :: TestCases/Posix/stack-overflow.cpp AddressSanitizer-sparc-linux-dynamic :: TestCases/Linux/stack-overflow-recovery-mode.cpp AddressSanitizer-sparc-linux-dynamic :: TestCases/Linux/stack-overflow-sigbus.cpp AddressSanitizer-sparc-linux-dynamic :: TestCases/Posix/stack-overflow.cpp ``` However, as detailed in Issue #109771, even a Linux equivalent of the Solaris/sparcv9 fix (PR #109101) doesn't improve the situation. Therefore this patch `XFAIL`s the tests until the root cause can be figured out. Tested on `sparc64-unknown-linux-gnu`, `sparcv9-sun-solaris2.11`, and `x86_64-pc-linux-gnu`. --- .../test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp | 3 +++ .../test/asan/TestCases/Linux/stack-overflow-sigbus.cpp | 3 +++ compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp index e99665953784ab..461702a0ea7a96 100644 --- a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp @@ -3,6 +3,9 @@ // RUN: %clang_asan -O0 -fsanitize-recover=address %s -o %t // RUN: %env_asan_opts=halt_on_error=false not %run %t 2>&1 | FileCheck %s +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + #include #include #include diff --git a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp index 8c9599c9f61108..f6c95318238af4 100644 --- a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp @@ -2,6 +2,9 @@ // RUN: %clangxx_asan -O0 %s -o %t && %env_asan_opts=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + #include #include #include diff --git a/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp b/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp index 06057250f87599..3d95a4ba273db5 100644 --- a/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp @@ -16,6 +16,9 @@ // RUN: not %run %t 2>&1 | FileCheck %s // REQUIRES: stable-runtime +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + // UNSUPPORTED: ios #include From 1cb12fa9edd744fe72897660541f7841401823c6 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 25 Sep 2024 07:04:09 +0100 Subject: [PATCH 23/65] [GlobalISel] Combine unmerge(unmerge()) if the result is legal. (#109606) This attempts to fold: ``` %1:_(<2 x s32>), %2:_(<2 x s32>) = G_UNMERGE_VALUES %0:_(<4 x s32>) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %1 ``` Into a single UNMERGE: ``` %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0 ``` This transform already exists, this patch alters it to occur when the result UNMERGE is considered legal. It does not try to transform where the result would be extracting a subelement from a vector at the moment, as the code is not setup to handle it. ``` %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 ``` This helps us reduce the amount of legalization artefacts, especially from widened vectors padded with undef. --- .../GlobalISel/LegalizationArtifactCombiner.h | 4 + .../AArch64/GlobalISel/legalize-cmp.mir | 9 +- .../AArch64/GlobalISel/legalize-select.mir | 27 ++- llvm/test/CodeGen/AArch64/bswap.ll | 2 - .../AArch64/fixed-vector-deinterleave.ll | 5 - llvm/test/CodeGen/AArch64/fpext.ll | 35 ++-- llvm/test/CodeGen/AArch64/fptoi.ll | 132 +++++--------- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 47 +++-- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 47 +++-- llvm/test/CodeGen/AArch64/fptrunc.ll | 3 - llvm/test/CodeGen/AArch64/itofp.ll | 119 +++---------- llvm/test/CodeGen/AArch64/neon-perm.ll | 10 +- llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 59 +++--- llvm/test/CodeGen/AArch64/shift.ll | 33 +--- llvm/test/CodeGen/AArch64/shufflevector.ll | 73 +++++--- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 59 +++--- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 61 ++++--- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 57 +++--- .../artifact-combiner-unmerge-values.mir | 39 ++-- .../GlobalISel/legalize-store-global.mir | 168 +++++++++--------- .../AMDGPU/GlobalISel/legalize-trunc.mir | 12 +- 21 files changed, 439 insertions(+), 562 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index bc83f19dc581fa..0dc827b9ad43eb 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1090,6 +1090,10 @@ class LegalizationArtifactCombiner { LegalizeActionStep ActionStep = LI.getAction( {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); switch (ActionStep.Action) { + case LegalizeActions::Legal: + if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES, + {DestTy, SrcUnmergeSrcTy}})) + return false; case LegalizeActions::Lower: case LegalizeActions::Unsupported: break; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index 89750c90fc1cbe..bd80a892e239e4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -412,12 +412,11 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 92f8e524dbb312..52a28ad37e362e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -307,29 +307,24 @@ body: | ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s8>), [[UV9:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s16>), [[UV17:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV14]], [[UV16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV18]](s8), [[UV19]](s8), [[UV20]](s8), [[UV21]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>) - ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(<4 x s16>), [[UV23:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV22]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR3]] ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %w0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll index b14f1a43b7dcfd..74e4a167ae14ca 100644 --- a/llvm/test/CodeGen/AArch64/bswap.ll +++ b/llvm/test/CodeGen/AArch64/bswap.ll @@ -179,8 +179,6 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){ ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: rev16 v0.8b, v0.8b -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll index 5bd680ed489389..bbfec8c7c33617 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -18,11 +18,6 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h -; CHECK-GI-NEXT: mov h0, v2.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v2.h[1], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: fmov d0, d2 ; CHECK-GI-NEXT: ret %retval = call {<2 x half>, <2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half> %vec) diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index d942839c577d2b..df90f9d5f09109 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -376,15 +376,15 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) { ; CHECK-GI-LABEL: fpext_v4f16_v4f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] ; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: fcvt d2, h2 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: fcvt d3, h3 -; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-NEXT: fcvt d4, h1 +; CHECK-GI-NEXT: fcvt d1, h2 +; CHECK-GI-NEXT: fcvt d2, h3 +; CHECK-GI-NEXT: mov v0.d[1], v4.d[0] +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] ; CHECK-GI-NEXT: ret entry: %c = fpext <4 x half> %a to <4 x double> @@ -392,20 +392,11 @@ entry: } define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) { -; CHECK-SD-LABEL: fpext_v2f16_v2f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v2f16_v2f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v2f16_v2f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %c = fpext <2 x half> %a to <2 x float> ret <2 x float> %c diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f72a49f6ab7c89..c436c410a4e397 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -3961,9 +3961,6 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2d, v0.2d @@ -4008,9 +4005,6 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2d, v0.2d @@ -4207,17 +4201,17 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { ; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <4 x half> %a to <4 x i64> @@ -4273,17 +4267,17 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <4 x half> %a to <4 x i64> @@ -4369,29 +4363,29 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <8 x half> %a to <8 x i64> @@ -4477,29 +4471,29 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <8 x half> %a to <8 x i64> @@ -5708,9 +5702,6 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptos_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -5729,9 +5720,6 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptou_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -5984,21 +5972,13 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6017,21 +5997,13 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6460,21 +6432,13 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6493,21 +6457,13 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index ed7814938da25f..2d568e858c36b7 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1270,9 +1270,6 @@ define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { ; ; CHECK-GI-LABEL: test_signed_v2f16_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -3301,17 +3298,17 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) { ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -4167,29 +4164,29 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; ; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 40a865338cd85d..f63fba9dab6c63 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -1098,9 +1098,6 @@ define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v2f16_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -2711,17 +2708,17 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) { ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -3433,29 +3430,29 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; ; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index c0d4ddef23132d..2187717c4148ae 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -366,9 +366,6 @@ define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = fptrunc <2 x float> %a to <2 x half> diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index f70ec0f35cb586..c5bde81ba4a5ea 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -3313,24 +3313,17 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov h2, v1.h[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: smov x8, v0.s[0] -; CHECK-GI-NEXT: smov x9, v0.s[1] -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: smov x8, v1.s[0] -; CHECK-GI-NEXT: mov v0.d[1], x9 -; CHECK-GI-NEXT: smov x9, v1.s[1] +; CHECK-GI-NEXT: smov x8, v0.h[0] +; CHECK-GI-NEXT: smov x9, v0.h[1] ; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: smov x8, v0.h[2] ; CHECK-GI-NEXT: mov v1.d[1], x9 -; CHECK-GI-NEXT: scvtf v0.2d, v0.2d -; CHECK-GI-NEXT: scvtf v2.2d, v1.2d +; CHECK-GI-NEXT: smov x9, v0.h[3] +; CHECK-GI-NEXT: mov v2.d[0], x8 +; CHECK-GI-NEXT: scvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: scvtf v2.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret @@ -3365,24 +3358,17 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov h2, v1.h[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: mov w8, v0.s[0] -; CHECK-GI-NEXT: mov w9, v0.s[1] -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: mov w8, v1.s[0] -; CHECK-GI-NEXT: mov v0.d[1], x9 -; CHECK-GI-NEXT: mov w9, v1.s[1] +; CHECK-GI-NEXT: umov w8, v0.h[0] +; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: umov w8, v0.h[2] ; CHECK-GI-NEXT: mov v1.d[1], x9 -; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d -; CHECK-GI-NEXT: ucvtf v2.2d, v1.2d +; CHECK-GI-NEXT: umov w9, v0.h[3] +; CHECK-GI-NEXT: mov v2.d[0], x8 +; CHECK-GI-NEXT: ucvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret @@ -5267,11 +5253,8 @@ define <3 x float> @stofp_v3i8_v3f32(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] @@ -5300,11 +5283,8 @@ define <3 x float> @utofp_v3i8_v3f32(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] @@ -6222,9 +6202,6 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i64_v2f16: @@ -6271,9 +6248,6 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i64_v2f16: @@ -7210,9 +7184,6 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = sitofp <2 x i32> %a to <2 x half> @@ -7233,9 +7204,6 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = uitofp <2 x i32> %a to <2 x half> @@ -7443,18 +7411,12 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i16_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <2 x i16> %a to <2 x half> @@ -7484,18 +7446,12 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i16_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <2 x i16> %a to <2 x half> @@ -7977,9 +7933,6 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16: @@ -7990,12 +7943,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s ; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <2 x i8> %a to <2 x half> @@ -8039,27 +7987,14 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-FP16-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff -; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <2 x i8> %a to <2 x half> @@ -8096,11 +8031,8 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 ; CHECK-GI-NOFP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NOFP16-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NOFP16-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NOFP16-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] @@ -8149,11 +8081,8 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 ; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NOFP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NOFP16-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll index 2897741780f602..7b85924ce1e323 100644 --- a/llvm/test/CodeGen/AArch64/neon-perm.ll +++ b/llvm/test/CodeGen/AArch64/neon-perm.ll @@ -1739,15 +1739,7 @@ define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) { ; ; CHECK-GI-LABEL: test_vzip1_v4i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index adac75758220e2..6d331d9413f913 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -190,23 +185,39 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-NEXT: add x8, x0, #1 -; CHECK-NEXT: add x9, x1, #1 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: add x9, x1, #1 +; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -256,10 +267,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 54f7887aee8d3e..066928687cc02d 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -534,14 +534,7 @@ define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = shl <4 x i8> %0, %1 @@ -577,8 +570,6 @@ define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -723,14 +714,7 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v1.8b ; CHECK-GI-NEXT: sshl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = ashr <4 x i8> %0, %1 @@ -766,8 +750,6 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: sshl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -906,14 +888,7 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v1.8b ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = lshr <4 x i8> %0, %1 @@ -948,8 +923,6 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 5f4ff1e64673bb..6b5951551c3a54 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -2,9 +2,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes - ; ===== Legal Vector Types ===== define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { @@ -183,13 +180,30 @@ define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ -; CHECK-LABEL: shufflevector_v2i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.s[1], v1.s[1] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v2i1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.s[1], v1.s[1] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, v1.s[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov w9, v0.s[1] +; CHECK-GI-NEXT: mov v1.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[1], w9 +; CHECK-GI-NEXT: mov b1, v1.b[1] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: umov w8, v0.b[0] +; CHECK-GI-NEXT: umov w9, v0.b[1] +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } @@ -358,11 +372,24 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ===== Smaller/Larger Width Vectors with Zero Masks ===== define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ -; CHECK-LABEL: shufflevector_v2i1_zeroes: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: dup v0.2s, v0.s[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v2i1_zeroes: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: dup v0.2s, v0.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i1_zeroes: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] +; CHECK-GI-NEXT: umov w8, v0.b[0] +; CHECK-GI-NEXT: umov w9, v0.b[1] +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } @@ -486,11 +513,9 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 -; CHECK-GI-NEXT: fmov w2, s2 +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c @@ -598,11 +623,9 @@ define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: dup v0.8b, v0.b[0] -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 -; CHECK-GI-NEXT: fmov w2, s2 +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 12371ef2c0021b..dddda7e9ba64cd 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -191,23 +186,39 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-NEXT: add x8, x0, #1 -; CHECK-NEXT: add x9, x1, #1 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: add x9, x1, #1 +; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -257,10 +268,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index e99935e8677fc7..badd31c1c561c5 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -187,24 +182,40 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: movi d2, #0x0000ff000000ff -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff +; CHECK-SD-NEXT: ldrb w10, [x0, #1] +; CHECK-SD-NEXT: ldrb w11, [x1, #1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -255,10 +266,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index cdba9625431a58..45418b5c648fa3 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -188,22 +183,38 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: ldrb w10, [x0, #1] +; CHECK-SD-NEXT: ldrb w11, [x1, #1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -252,10 +263,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 3b456ed248b3af..8300b2bc05e96d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -1151,14 +1151,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>), implicit [[CONCAT_VECTORS1]](<4 x s16>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -1250,14 +1249,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC2]](s32), [[TRUNC3]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -1300,21 +1298,20 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 31f28b50462b78..f2a88a21a286ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -7418,9 +7418,8 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7440,7 +7439,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7456,7 +7455,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7472,7 +7471,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7488,8 +7487,8 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7504,7 +7503,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7519,7 +7518,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7534,7 +7533,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7565,9 +7564,8 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7588,7 +7586,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) @@ -7605,7 +7603,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) @@ -7622,7 +7620,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -7639,8 +7637,8 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -7656,7 +7654,7 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) @@ -7672,7 +7670,7 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) @@ -7688,7 +7686,7 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -7730,9 +7728,8 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7741,47 +7738,47 @@ body: | ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -7803,9 +7800,8 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7814,47 +7810,47 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -8107,9 +8103,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8129,7 +8124,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8145,7 +8140,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8161,7 +8156,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8177,8 +8172,8 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8193,7 +8188,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8208,7 +8203,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8223,7 +8218,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8256,9 +8251,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8279,7 +8273,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) @@ -8296,7 +8290,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) @@ -8313,7 +8307,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -8330,8 +8324,8 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -8347,7 +8341,7 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) @@ -8363,7 +8357,7 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) @@ -8379,7 +8373,7 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -8423,9 +8417,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8434,47 +8427,47 @@ body: | ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -8498,9 +8491,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8509,47 +8501,47 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index 5205386c8ea713..282550830442cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -151,21 +151,19 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] From 1cdcec5884fb99d921800197e0863329de6b10ee Mon Sep 17 00:00:00 2001 From: yronglin Date: Wed, 25 Sep 2024 14:09:06 +0800 Subject: [PATCH 24/65] [clang][bytecode] Handle vector comma op (#109827) Currently we were not handle comma op for vector operands, after this patch, we handle the comma op for vector operands with common way. Signed-off-by: yronglin --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++--- clang/test/Sema/fp16vec-sema.c | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 785918846976d4..e54b6568d7060b 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -725,9 +725,7 @@ bool Compiler::VisitParenExpr(const ParenExpr *E) { template bool Compiler::VisitBinaryOperator(const BinaryOperator *BO) { // Need short-circuiting for these. - if (BO->getType()->isVectorType()) - return this->VisitVectorBinOp(BO); - if (BO->isLogicalOp()) + if (BO->isLogicalOp() && !BO->getType()->isVectorType()) return this->VisitLogicalBinOp(BO); const Expr *LHS = BO->getLHS(); @@ -746,6 +744,8 @@ bool Compiler::VisitBinaryOperator(const BinaryOperator *BO) { if (BO->getType()->isAnyComplexType()) return this->VisitComplexBinOp(BO); + if (BO->getType()->isVectorType()) + return this->VisitVectorBinOp(BO); if ((LHS->getType()->isAnyComplexType() || RHS->getType()->isAnyComplexType()) && BO->isComparisonOp()) @@ -1264,6 +1264,8 @@ bool Compiler::VisitComplexBinOp(const BinaryOperator *E) { template bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { + assert(!E->isCommaOp() && + "Comma op should be handled in VisitBinaryOperator"); assert(E->getType()->isVectorType()); assert(E->getLHS()->getType()->isVectorType()); assert(E->getRHS()->getType()->isVectorType()); diff --git a/clang/test/Sema/fp16vec-sema.c b/clang/test/Sema/fp16vec-sema.c index 80936cd622f7cd..89f01c6dcf47b6 100644 --- a/clang/test/Sema/fp16vec-sema.c +++ b/clang/test/Sema/fp16vec-sema.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -fsyntax-only -Wno-unused-value -verify %s typedef __fp16 half4 __attribute__ ((vector_size (8))); typedef float float4 __attribute__ ((vector_size (16))); From 832297ca32f4fba75fea68efc93a3e6c222c89ff Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 24 Sep 2024 23:22:20 -0700 Subject: [PATCH 25/65] Fix compatibility version in test (#97128) The mangling compatibility being tested here changed between Clang 17 and 18, not between 16 and 17, so change the "old" version to 17. As requested by @ahatanak in [post-commit review](https://github.com/llvm/llvm-project/commit/7421dd55a16f18919a568499e4c0888ed3a5e8b5#commitcomment-128651446). --- clang/test/CodeGenCXX/mangle-concept.cpp | 52 ++++++++++++------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/clang/test/CodeGenCXX/mangle-concept.cpp b/clang/test/CodeGenCXX/mangle-concept.cpp index e9c46d87635abb..91dc1b0e688e0d 100644 --- a/clang/test/CodeGenCXX/mangle-concept.cpp +++ b/clang/test/CodeGenCXX/mangle-concept.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=latest | FileCheck %s -// RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=16 | FileCheck %s --check-prefix=CLANG16 +// RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=17 | FileCheck %s --check-prefix=CLANG17 // expected-no-diagnostics namespace test1 { @@ -8,7 +8,7 @@ template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); // CHECK: @_ZN5test12f0IiEENS_1SIX1CIT_EEEEv( -// CLANG16: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( +// CLANG17: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( } template struct S {}; @@ -18,12 +18,12 @@ template concept D = true; template S> f0a() { return S>{}; } template S> f0a<>(); // CHECK: @_Z3f0aIiE1SIXsr5test1E1CIT_EEEv( -// CLANG16: @_Z3f0aIiE1SIL_ZN5test11CIT_EEEEv( +// CLANG17: @_Z3f0aIiE1SIL_ZN5test11CIT_EEEEv( template S> f0() { return S>{}; } template S> f0<>(); // CHECK: @_Z2f0IiE1SIX1CIT_EEEv( -// CLANG16: @_Z2f0IiE1SIL_Z1CIT_EEEv( +// CLANG17: @_Z2f0IiE1SIL_Z1CIT_EEEv( template concept True = true; @@ -56,25 +56,25 @@ namespace test2 { // CHECK-LABEL: define {{.*}}@{{.*}}test2{{.*}}use void use() { // CHECK: call {{.*}}@_ZN5test21AIiEF1fEzQ4TrueIT_E( - // CLANG16: call {{.*}}@_ZN5test21fEz( + // CLANG17: call {{.*}}@_ZN5test21fEz( f(ai); // CHECK: call {{.*}}@_ZN5test2F1gIvEEvzQaa4TrueIT_E4TrueITL0__E( - // CLANG16: call {{.*}}@_ZN5test21gIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21gIvEEvz( g(ai); // CHECK: call {{.*}}@_ZN5test21hIvEEvzQ4TrueITL0__E( - // CLANG16: call {{.*}}@_ZN5test21hIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21hIvEEvz( h(ai); // CHECK: call {{.*}}@_ZN5test2F1iIvQaa4TrueIT_E4TrueITL0__EEEvz( - // CLANG16: call {{.*}}@_ZN5test21iIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21iIvEEvz( i(ai); // CHECK: call {{.*}}@_ZN5test21jIvQ4TrueITL0__EEEvz( - // CLANG16: call {{.*}}@_ZN5test21jIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21jIvEEvz( j(ai); // CHECK: call {{.*}}@_ZN5test2F1kITk4TruevQ4TrueIT_EEEvz( - // CLANG16: call {{.*}}@_ZN5test21kIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21kIvEEvz( k(ai); // CHECK: call {{.*}}@_ZN5test21lITk4TruevEEvz( - // CLANG16: call {{.*}}@_ZN5test21lIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21lIvEEvz( l(ai); } } @@ -84,38 +84,38 @@ namespace test3 { template void d() {} template void d<0>(); // CHECK: define {{.*}}@_ZN5test31dITnDaLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31dILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31dILi0EEEvv( template void e() {} template void e<0>(); // CHECK: define {{.*}}@_ZN5test31eITnDcLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31eILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31eILi0EEEvv( // Constrained auto. template void f() {} template void f<0>(); // CHECK: define {{.*}}@_ZN5test31fITnDk1CLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31fILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31fILi0EEEvv( template auto> void g() {} template void g<0>(); // CHECK: define {{.*}}@_ZN5test31gITnDk1DIiELi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31gILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31gILi0EEEvv( template auto> void h() {} template void h(); // CHECK: define {{.*}}@_ZN5test31hIiTnDk1DIT_ELi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31hIiLi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31hIiLi0EEEvv( template void i(decltype(new C auto(T()))) {} template void i(int*); // CHECK: define {{.*}}@_ZN5test31iIiEEvDTnw_Dk1CpicvT__EEE( - // CLANG16: define {{.*}}@_ZN5test31iIiEEvDTnw_DapicvT__EEE( + // CLANG17: define {{.*}}@_ZN5test31iIiEEvDTnw_DapicvT__EEE( template void j(decltype(new C decltype(auto)(T()))) {} template void j(int*); // CHECK: define {{.*}}@_ZN5test31jIiEEvDTnw_DK1CpicvT__EEE( - // CLANG16: define {{.*}}@_ZN5test31jIiEEvDTnw_DcpicvT__EEE( + // CLANG17: define {{.*}}@_ZN5test31jIiEEvDTnw_DcpicvT__EEE( } namespace test4 { @@ -123,12 +123,12 @@ namespace test4 { template void f() {} template void f(); // CHECK: define {{.*}}@_ZN5test41fITk1CiEEvv( - // CLANG16: define {{.*}}@_ZN5test41fIiEEvv( + // CLANG17: define {{.*}}@_ZN5test41fIiEEvv( template> void g() {} template void g(); // CHECK: define {{.*}}@_ZN5test41gITk1DIiEiEEvv( - // CLANG16: define {{.*}}@_ZN5test41gIiEEvv( + // CLANG17: define {{.*}}@_ZN5test41gIiEEvv( } namespace test5 { @@ -175,18 +175,18 @@ namespace test5 { template typename> void p() {} // CHECK: define {{.*}}@_ZN5test51pINS_1AEEEvv( - // CLANG16: define {{.*}}@_ZN5test51pINS_1AEEEvv( + // CLANG17: define {{.*}}@_ZN5test51pINS_1AEEEvv( template void p(); // CHECK: define {{.*}}@_ZN5test51pITtTpTyENS_1BEEEvv( - // CLANG16: define {{.*}}@_ZN5test51pINS_1BEEEvv( + // CLANG17: define {{.*}}@_ZN5test51pINS_1BEEEvv( template void p(); template typename> void q() {} // CHECK: define {{.*}}@_ZN5test51qITtTyTyENS_1AEEEvv( - // CLANG16: define {{.*}}@_ZN5test51qINS_1AEEEvv( + // CLANG17: define {{.*}}@_ZN5test51qINS_1AEEEvv( template void q(); // CHECK: define {{.*}}@_ZN5test51qINS_1BEEEvv( - // CLANG16: define {{.*}}@_ZN5test51qINS_1BEEEvv( + // CLANG17: define {{.*}}@_ZN5test51qINS_1BEEEvv( template void q(); } @@ -194,13 +194,13 @@ namespace test6 { // Abbreviated function templates. void f(C auto) {} // CHECK: define {{.*}}@_ZN5test61fITk1CiEEvT_( - // CLANG16: define {{.*}}@_ZN5test61fIiEEvT_( + // CLANG17: define {{.*}}@_ZN5test61fIiEEvT_( template void f(int); template void g(D auto) {} // CHECK: define {{.*}}@_ZN5test61gIiTk1DIT_EiEEvT0_( - // CLANG16: define {{.*}}@_ZN5test61gIiiEEvT0_( + // CLANG17: define {{.*}}@_ZN5test61gIiiEEvT0_( template void g(int); } From c08a4376442aad58a1c58d3ff95fa74417d24353 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 23:24:31 -0700 Subject: [PATCH 26/65] [GlobalISel] Fix a warning This patch fixes: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h:1097:7: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] --- .../llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 0dc827b9ad43eb..471a7f70dd546c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1094,6 +1094,7 @@ class LegalizationArtifactCombiner { if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES, {DestTy, SrcUnmergeSrcTy}})) return false; + break; case LegalizeActions::Lower: case LegalizeActions::Unsupported: break; From a6ea0b0d2973d3d94f9ed7aac6db9ca722664774 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 25 Sep 2024 07:28:07 +0100 Subject: [PATCH 27/65] [compiler-rt] intercept macOs's freadlink call. (#83679) available since macOs Ventura. --- .../sanitizer_common_interceptors.inc | 19 ++++++++++++ .../sanitizer_platform_interceptors.h | 1 + .../TestCases/Darwin/freadlink.c | 29 +++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index b382e7a61950c7..7898af4a335e3a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -10342,6 +10342,24 @@ INTERCEPTOR(SSIZE_T, pwritev2, int fd, __sanitizer_iovec *iov, int iovcnt, #define INIT_PWRITEV2 #endif +#if SANITIZER_INTERCEPT_FREADLINK +INTERCEPTOR(SSIZE_T, freadlink, int fd, char *buf, SIZE_T bufsiz) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, freadlink, fd, buf, bufsiz); + COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd); + SSIZE_T res = REAL(freadlink)(fd, buf, bufsiz); + if (res > 0) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res); + if (res >= 0 && fd > 0) + COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd); + return res; +} + +# define INIT_FREADLINK COMMON_INTERCEPT_FUNCTION(freadlink) +#else +# define INIT_FREADLINK +#endif + #include "sanitizer_common_interceptors_netbsd_compat.inc" namespace __sanitizer { @@ -10663,6 +10681,7 @@ static void InitializeCommonInterceptors() { INIT_CPUSET_GETAFFINITY; INIT_PREADV2; INIT_PWRITEV2; + INIT_FREADLINK; INIT___PRINTF_CHK; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index e71a6bcd6a8371..05fa7e63268f26 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -606,6 +606,7 @@ // FIXME: also available from musl 1.2.5 #define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) #define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) +#define SANITIZER_INTERCEPT_FREADLINK SI_MAC // This macro gives a way for downstream users to override the above // interceptor macros irrespective of the platform they are on. They have diff --git a/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c b/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c new file mode 100644 index 00000000000000..53658cdb66aa3d --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c @@ -0,0 +1,29 @@ +// RUN: %clang -O0 %s -o %t && %run %t + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + char symlink_path[PATH_MAX]; + snprintf(symlink_path, sizeof(symlink_path), "%s_%d.symlink", argv[0], + getpid()); + remove(symlink_path); + int res = symlink(argv[0], symlink_path); + assert(!res); + + int fd; + char readlink_path[PATH_MAX]; + fd = open(symlink_path, O_RDONLY); + ssize_t res2 = freadlink(fd, readlink_path, sizeof(readlink_path)); + assert(res2 >= 0); + readlink_path[res2] = '\0'; + assert(!strcmp(readlink_path, argv[0])); + close(fd); + + return 0; +} From df6822f4eb81638e724f09b948b83fe678412d25 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 25 Sep 2024 08:31:42 +0200 Subject: [PATCH 28/65] [lldb] Fix error reporting in SBTarget::ReadMemory (#109764) The function should use the by-ref SBError argument instead of creating a new one. This code has been here since ~forever, and was probably copied from methods which return an SBError result (where one needs to create a local variable). --- lldb/source/API/SBTarget.cpp | 5 ++--- lldb/test/API/python_api/target/TestTargetAPI.py | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 1c1f7e2a03def8..d5017ad6bff166 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -662,15 +662,14 @@ size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size, lldb::SBError &error) { LLDB_INSTRUMENT_VA(this, addr, buf, size, error); - SBError sb_error; size_t bytes_read = 0; TargetSP target_sp(GetSP()); if (target_sp) { std::lock_guard guard(target_sp->GetAPIMutex()); bytes_read = - target_sp->ReadMemory(addr.ref(), buf, size, sb_error.ref(), true); + target_sp->ReadMemory(addr.ref(), buf, size, error.ref(), true); } else { - sb_error.SetErrorString("invalid target"); + error.SetErrorString("invalid target"); } return bytes_read; diff --git a/lldb/test/API/python_api/target/TestTargetAPI.py b/lldb/test/API/python_api/target/TestTargetAPI.py index 2e8d6a5b1e53f6..155a25b576b03a 100644 --- a/lldb/test/API/python_api/target/TestTargetAPI.py +++ b/lldb/test/API/python_api/target/TestTargetAPI.py @@ -153,6 +153,11 @@ def test_read_memory(self): self.assertSuccess(error, "Make sure memory read succeeded") self.assertEqual(len(content), 1) + # Make sure reading from 0x0 fails + sb_addr = lldb.SBAddress(0, target) + self.assertIsNone(target.ReadMemory(sb_addr, 1, error)) + self.assertTrue(error.Fail()) + @skipIfWindows # stdio manipulation unsupported on Windows @skipIfRemote # stdio manipulation unsupported on remote iOS devices @skipIf(oslist=["linux"], archs=["arm", "aarch64"]) From 7c825f0d6a6db097e364bd747f8421518d34093a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 25 Sep 2024 06:37:47 +0000 Subject: [PATCH 29/65] [gn build] Port fa824dc0dd96 --- llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index 0f6e345b9d1754..3fecf9477ee761 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -21,6 +21,7 @@ static_library("IR") { "BasicBlock.cpp", "BuiltinGCs.cpp", "Comdat.cpp", + "ConstantFPRange.cpp", "ConstantFold.cpp", "ConstantRange.cpp", "ConstantRangeList.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index ba897a679db461..b19d54d7ed92f1 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -17,6 +17,7 @@ unittest("IRTests") { "BasicBlockDbgInfoTest.cpp", "BasicBlockTest.cpp", "CFGBuilder.cpp", + "ConstantFPRangeTest.cpp", "ConstantRangeListTest.cpp", "ConstantRangeTest.cpp", "ConstantsTest.cpp", From d7c6e943836368c17467f2d0dcf1e8caddbd9b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 25 Sep 2024 08:57:18 +0200 Subject: [PATCH 30/65] [AMDGPU][StructurizeCFG] pre-commit tests: maintain branch_weights metadata (#109812) --- .../structurizer-keep-perf-md.ll | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll diff --git a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll new file mode 100644 index 00000000000000..862c50c6183f16 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-- -passes=structurizecfg %s | FileCheck -check-prefix=OPT %s + +define amdgpu_ps i32 @if_else(i32 %0) { +; OPT-LABEL: define amdgpu_ps i32 @if_else( +; OPT-SAME: i32 [[TMP0:%.*]]) { +; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0 +; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]] +; OPT: [[FLOW]]: +; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ] +; OPT-NEXT: br i1 [[TMP3]], label %[[TRUE:.*]], label %[[EXIT:.*]] +; OPT: [[TRUE]]: +; OPT-NEXT: br label %[[EXIT]] +; OPT: [[FALSE]]: +; OPT-NEXT: br label %[[FLOW]] +; OPT: [[EXIT]]: +; OPT-NEXT: [[RET:%.*]] = phi i32 [ [[TMP2]], %[[FLOW]] ], [ 42, %[[TRUE]] ] +; OPT-NEXT: ret i32 [[RET]] +; + %c = icmp eq i32 %0, 0 + br i1 %c, label %true, label %false, !prof !0 + +true: ; preds = %1 + br label %exit + +false: ; preds = %1 + br label %exit + +exit: ; preds = %false, %true + %ret = phi i32 [ 42, %true ], [ 33, %false ] + ret i32 %ret +} + +define amdgpu_ps void @loop_if_break(i32 %n) { +; OPT-LABEL: define amdgpu_ps void @loop_if_break( +; OPT-SAME: i32 [[N:%.*]]) { +; OPT-NEXT: [[ENTRY:.*]]: +; OPT-NEXT: br label %[[LOOP:.*]] +; OPT: [[LOOP]]: +; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ] +; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0 +; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]] +; OPT: [[LOOP_BODY]]: +; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1 +; OPT-NEXT: br label %[[FLOW]] +; OPT: [[FLOW]]: +; OPT-NEXT: [[TMP0]] = phi i32 [ [[I_NEXT]], %[[LOOP_BODY]] ], [ undef, %[[LOOP]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[LOOP_BODY]] ], [ true, %[[LOOP]] ] +; OPT-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP]] +; OPT: [[EXIT]]: +; OPT-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop_body, %entry + %i = phi i32 [ %n, %entry ], [ %i.next, %loop_body ] + %c = icmp ugt i32 %i, 0 + br i1 %c, label %loop_body, label %exit, !prof !0 + +loop_body: ; preds = %loop + %i.next = sub i32 %i, 1 + br label %loop + +exit: ; preds = %loop + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!0 = !{!"branch_weights", i32 1000, i32 1} From 90c14748638f1e10e31173b145fdbb5c4529c922 Mon Sep 17 00:00:00 2001 From: Timothy Pearson <162513562+tpearson-ssc@users.noreply.github.com> Date: Wed, 25 Sep 2024 02:09:50 -0500 Subject: [PATCH 31/65] [SDAG] Honor signed arguments in floating point libcalls (#109134) In ExpandFPLibCall, an assumption is made that all floating point libcalls that take integer arguments use unsigned integers. In the case of ldexp and frexp, this assumption is incorrect, leading to miscompilation and subsequent target-dependent incorrect operation. Indicate that ldexp and frexp utilize signed arguments in ExpandFPLibCall. Fixes #108904 Signed-off-by: Timothy Pearson --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +- llvm/test/CodeGen/PowerPC/ldexp-libcall.ll | 4 +- llvm/test/CodeGen/PowerPC/ldexp.ll | 36 ++++++---- .../PowerPC/negative-integer-fp-libcall.ll | 26 +++++++ .../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 69 ++++++++++++------- 5 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3c087727a80126..04eb891f719d28 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2205,7 +2205,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { - SDValue Tmp = ExpandLibCall(LC, Node, false).first; + bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; + SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d9203651..e531516c37e87e 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexp ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30bd..ffc826cc86de59 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: xscvdpspn v29, f1 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: li r3, 12 -; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: vextuwrx r4, r3, v3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 -; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: li r3, 12 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: xscvspdpn f1, v30 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxmrghd vs0, v29, vs1 +; CHECK-NEXT: xxmrghd vs0, vs1, v29 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 ; CHECK-NEXT: xvcvdpsp v28, vs0 ; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: mfvsrwz r3, v31 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: mfvsrwz r4, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop @@ -156,7 +162,7 @@ define half @ldexp_f16(half %arg0, i32 %arg1) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 diff --git a/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll new file mode 100644 index 00000000000000..010ee6ef043e71 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; Test that a negative parameter smaller than 64 bits (e.g., int) +; is correctly implemented with sign-extension when passed to +; a floating point libcall. + +define double @ldexp_test(ptr %a, ptr %b) nounwind { +; CHECK-LABEL: ldexp_test: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfd 1, 0(3) +; CHECK-NEXT: lwa 4, 0(4) +; CHECK-NEXT: bl ldexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %base = load double, ptr %a + %exp = load i32, ptr %b + %call = call double @llvm.ldexp.f64.i32(double %base, i32 signext %exp) + ret double %call +} diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 5a051a9c499e47..332fbf7188af81 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -406,13 +406,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: subq $72, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-SSE-NEXT: pextrw $7, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $7, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $6, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $6, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -420,13 +422,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $5, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $5, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $4, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $4, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -436,13 +440,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $3, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $3, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $2, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $2, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -450,14 +456,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $1, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $1, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: movd %xmm0, %eax -; CHECK-SSE-NEXT: movzwl %ax, %edi +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -476,13 +483,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: subq $72, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -490,13 +499,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -506,13 +517,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -520,14 +533,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vmovd %xmm0, %eax -; CHECK-AVX2-NEXT: movzwl %ax, %edi +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -546,7 +560,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: subq $72, %rsp ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -554,7 +569,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -564,7 +580,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -572,7 +589,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -584,7 +602,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -592,7 +611,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -602,7 +622,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -611,7 +632,7 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax -; CHECK-AVX512F-NEXT: movzwl %ax, %edi +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 From ae7b454f98ba002ba1cc4cf19fac55f629cfc52f Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 25 Sep 2024 09:17:49 +0200 Subject: [PATCH 32/65] Revert "[MLIR] Make `OneShotModuleBufferize` use `OpInterface`" (#109919) Reverts llvm/llvm-project#107295 This commit breaks an integration test: ``` build/bin/mlir-opt mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir -one-shot-bufferize="bufferize-function-boundaries" ``` --- .../IR/BufferizableOpInterface.h | 7 +- .../FuncBufferizableOpInterfaceImpl.h | 12 +- .../IR/BufferizableOpInterface.cpp | 5 +- .../FuncBufferizableOpInterfaceImpl.cpp | 2 +- .../Transforms/OneShotModuleBufferize.cpp | 111 ++++++------- .../Transforms/transform-ops.mlir | 142 ++++++++--------- mlir/test/Dialect/LLVM/transform-e2e.mlir | 22 ++- .../Linalg/matmul-shared-memory-padding.mlir | 52 +++--- .../Linalg/pad-to-specific-memory-space.mlir | 148 +++++++++--------- .../test/Dialect/Vector/transform-vector.mlir | 84 +++++----- mlir/test/Examples/transform/ChH/full.mlir | 11 +- 11 files changed, 281 insertions(+), 315 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index d19687ec9afee1..aceb9d059b95f3 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -11,7 +11,6 @@ #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/SetVector.h" @@ -261,9 +260,9 @@ struct BufferizationOptions { using AnalysisStateInitFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, func op, bufferization options - using FunctionArgTypeConverterFn = std::function; + using FunctionArgTypeConverterFn = + std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, bufferization options using UnknownTypeConverterFn = std::function equivalentFuncArgs; + DenseMap equivalentFuncArgs; /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices. - DenseMap aliasingReturnVals; + DenseMap aliasingReturnVals; /// A set of all read BlockArguments of FuncOps. - DenseMap readBbArgs; + DenseMap readBbArgs; /// A set of all written-to BlockArguments of FuncOps. - DenseMap writtenBbArgs; + DenseMap writtenBbArgs; /// Keep track of which FuncOps are fully analyzed or currently being /// analyzed. - DenseMap analyzedFuncOps; + DenseMap analyzedFuncOps; /// This function is called right before analyzing the given FuncOp. It /// initializes the data structures for the FuncOp in this state object. - void startFunctionAnalysis(FunctionOpInterface funcOp); + void startFunctionAnalysis(FuncOp funcOp); }; void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 92f757111cbaf7..85604eef2f2830 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -18,7 +18,6 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Debug.h" @@ -315,7 +314,7 @@ namespace { /// Default function arg type converter: Use a fully dynamic layout map. BaseMemRefType defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace, - FunctionOpInterface funcOp, + func::FuncOp funcOp, const BufferizationOptions &options) { return getMemRefTypeWithFullyDynamicLayout(type, memorySpace); } @@ -362,7 +361,7 @@ BufferizationOptions::dynCastBufferizableOp(Value value) const { void BufferizationOptions::setFunctionBoundaryTypeConversion( LayoutMapOption layoutMapOption) { functionArgTypeConverterFn = [=](TensorType tensorType, Attribute memorySpace, - FunctionOpInterface funcOp, + func::FuncOp funcOp, const BufferizationOptions &options) { if (layoutMapOption == LayoutMapOption::IdentityLayoutMap) return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 9749a71f3514bc..9fbe574ec392dc 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -22,7 +22,7 @@ namespace mlir { namespace bufferization { namespace func_ext { -void FuncAnalysisState::startFunctionAnalysis(FunctionOpInterface funcOp) { +void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) { analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress; auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping()); auto createdAliasingResults = diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index f934ae88277b63..0a4072605c265f 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; using namespace mlir::bufferization::func_ext; /// A mapping of FuncOps to their callers. -using FuncCallerMap = DenseMap>; +using FuncCallerMap = DenseMap>; /// Get or create FuncAnalysisState. static FuncAnalysisState & @@ -88,11 +88,10 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { /// Return the unique ReturnOp that terminates `funcOp`. /// Return nullptr if there is no such unique ReturnOp. -static Operation *getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { - Operation *returnOp = nullptr; - for (Block &b : funcOp.getFunctionBody()) { - auto candidateOp = b.getTerminator(); - if (candidateOp && candidateOp->hasTrait()) { +static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) { + func::ReturnOp returnOp; + for (Block &b : funcOp.getBody()) { + if (auto candidateOp = dyn_cast(b.getTerminator())) { if (returnOp) return nullptr; returnOp = candidateOp; @@ -127,16 +126,16 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. static LogicalResult -aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, - OneShotAnalysisState &state, +aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - if (funcOp.getFunctionBody().empty()) { + if (funcOp.getBody().empty()) { // No function body available. Conservatively assume that every tensor // return value may alias with any tensor bbArg. - for (const auto &inputIt : llvm::enumerate(funcOp.getArgumentTypes())) { + FunctionType type = funcOp.getFunctionType(); + for (const auto &inputIt : llvm::enumerate(type.getInputs())) { if (!isa(inputIt.value())) continue; - for (const auto &resultIt : llvm::enumerate(funcOp.getResultTypes())) { + for (const auto &resultIt : llvm::enumerate(type.getResults())) { if (!isa(resultIt.value())) continue; int64_t returnIdx = resultIt.index(); @@ -148,7 +147,7 @@ aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, } // Support only single return-terminated block in the function. - Operation *returnOp = getAssumedUniqueReturnOp(funcOp); + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); assert(returnOp && "expected func with single return op"); for (OpOperand &returnVal : returnOp->getOpOperands()) @@ -169,8 +168,8 @@ aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, return success(); } -static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, - bool isRead, bool isWritten) { +static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, + bool isWritten) { OpBuilder b(funcOp.getContext()); Attribute accessType; if (isRead && isWritten) { @@ -190,12 +189,12 @@ static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, /// function with unknown ops, we conservatively assume that such ops bufferize /// to a read + write. static LogicalResult -funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, - OneShotAnalysisState &state, +funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; ++idx) { + for (int64_t idx = 0, e = funcOp.getFunctionType().getNumInputs(); idx < e; + ++idx) { // Skip non-tensor arguments. - if (!isa(funcOp.getArgumentTypes()[idx])) + if (!isa(funcOp.getFunctionType().getInput(idx))) continue; bool isRead; bool isWritten; @@ -205,7 +204,7 @@ funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, StringRef str = accessAttr.getValue(); isRead = str == "read" || str == "read-write"; isWritten = str == "write" || str == "read-write"; - } else if (funcOp.getFunctionBody().empty()) { + } else if (funcOp.getBody().empty()) { // If the function has no body, conservatively assume that all args are // read + written. isRead = true; @@ -231,19 +230,20 @@ funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, /// Remove bufferization attributes on FuncOp arguments. static void removeBufferizationAttributes(BlockArgument bbArg) { - auto funcOp = cast(bbArg.getOwner()->getParentOp()); + auto funcOp = cast(bbArg.getOwner()->getParentOp()); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kBufferLayoutAttrName); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kWritableAttrName); } -static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { +/// Return the func::FuncOp called by `callOp`. +static func::FuncOp getCalledFunction(func::CallOp callOp) { SymbolRefAttr sym = llvm::dyn_cast_if_present(callOp.getCallableForCallee()); if (!sym) return nullptr; - return dyn_cast_or_null( + return dyn_cast_or_null( SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } @@ -251,12 +251,12 @@ static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. // TODO: This does not handle cyclic function call graphs etc. -static void equivalenceAnalysis(FunctionOpInterface funcOp, +static void equivalenceAnalysis(func::FuncOp funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - funcOp->walk([&](CallOpInterface callOp) { - FunctionOpInterface calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called FunctionOpInterface"); + funcOp->walk([&](func::CallOp callOp) { + func::FuncOp calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called func::FuncOp"); // No equivalence info available for the called function. if (!funcState.equivalentFuncArgs.count(calledFunction)) @@ -267,7 +267,7 @@ static void equivalenceAnalysis(FunctionOpInterface funcOp, int64_t bbargIdx = it.second; if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) continue; - Value returnVal = callOp->getResult(returnIdx); + Value returnVal = callOp.getResult(returnIdx); Value argVal = callOp->getOperand(bbargIdx); state.unionEquivalenceClasses(returnVal, argVal); } @@ -277,9 +277,11 @@ static void equivalenceAnalysis(FunctionOpInterface funcOp, } /// Return "true" if the given function signature has tensor semantics. -static bool hasTensorSignature(FunctionOpInterface funcOp) { - return llvm::any_of(funcOp.getArgumentTypes(), llvm::IsaPred) || - llvm::any_of(funcOp.getResultTypes(), llvm::IsaPred); +static bool hasTensorSignature(func::FuncOp funcOp) { + return llvm::any_of(funcOp.getFunctionType().getInputs(), + llvm::IsaPred) || + llvm::any_of(funcOp.getFunctionType().getResults(), + llvm::IsaPred); } /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by @@ -289,16 +291,16 @@ static bool hasTensorSignature(FunctionOpInterface funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, - SmallVectorImpl &orderedFuncOps, + SmallVectorImpl &orderedFuncOps, FuncCallerMap &callerMap) { // For each FuncOp, the set of functions called by it (i.e. the union of // symbols of all nested func::CallOp). - DenseMap> calledBy; + DenseMap> calledBy; // For each FuncOp, the number of func::CallOp it contains. - DenseMap numberCallOpsContainedInFuncOp; - WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { - if (!funcOp.getFunctionBody().empty()) { - Operation *returnOp = getAssumedUniqueReturnOp(funcOp); + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { + if (!funcOp.getBody().empty()) { + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); if (!returnOp) return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " @@ -307,10 +309,9 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { - FunctionOpInterface calledFunction = getCalledFunction(callOp); - assert(calledFunction && - "could not retrieved called FunctionOpInterface"); + return funcOp.walk([&](func::CallOp callOp) -> WalkResult { + func::FuncOp calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called func::FuncOp"); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. if (!hasTensorSignature(calledFunction)) @@ -348,11 +349,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, /// most generic layout map as function return types. After bufferizing the /// entire function body, a more concise memref type can potentially be used for /// the return type of the function. -static void foldMemRefCasts(FunctionOpInterface funcOp) { - if (funcOp.getFunctionBody().empty()) +static void foldMemRefCasts(func::FuncOp funcOp) { + if (funcOp.getBody().empty()) return; - Operation *returnOp = getAssumedUniqueReturnOp(funcOp); + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); SmallVector resultTypes; for (OpOperand &operand : returnOp->getOpOperands()) { @@ -364,8 +365,8 @@ static void foldMemRefCasts(FunctionOpInterface funcOp) { } } - auto newFuncType = FunctionType::get(funcOp.getContext(), - funcOp.getArgumentTypes(), resultTypes); + auto newFuncType = FunctionType::get( + funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes); funcOp.setType(newFuncType); } @@ -378,7 +379,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -387,7 +388,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (FunctionOpInterface funcOp : orderedFuncOps) { + for (func::FuncOp funcOp : orderedFuncOps) { if (!state.getOptions().isOpAllowed(funcOp)) continue; @@ -415,7 +416,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, void mlir::bufferization::removeBufferizationAttributesInModule( ModuleOp moduleOp) { - moduleOp.walk([&](FunctionOpInterface op) { + moduleOp.walk([&](func::FuncOp op) { for (BlockArgument bbArg : op.getArguments()) removeBufferizationAttributes(bbArg); }); @@ -429,7 +430,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -438,11 +439,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. - for (FunctionOpInterface funcOp : orderedFuncOps) { + for (func::FuncOp funcOp : orderedFuncOps) { // Note: It would be good to apply cleanups here but we cannot as aliasInfo // would be invalidated. - if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getName())) { + if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) { // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; @@ -462,7 +463,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { // Functions were already bufferized. - if (isa(&op)) + if (isa(&op)) continue; if (failed(bufferizeOp(&op, options, statistics))) return failure(); @@ -489,12 +490,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // FuncOps whose names are specified in options.noAnalysisFuncFilter will // not be analyzed. Ops in these FuncOps will not be analyzed as well. OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { - auto func = dyn_cast(op); + auto func = dyn_cast(op); if (!func) - func = op->getParentOfType(); + func = op->getParentOfType(); if (func) return llvm::is_contained(options.noAnalysisFuncFilter, - func.getName()); + func.getSymName()); return false; }; OneShotBufferizationOptions updatedOptions(options); diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir index 588aa8a85a84e6..3c50a9e72d9d9b 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt --transform-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s // Test One-Shot Bufferize. @@ -12,21 +12,19 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: memref.copy %[[A_memref]], %[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: memref.copy %[[A_memref]], %[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor - } + // CHECK: return %[[res_tensor]] + return %0 : tensor } // ----- @@ -44,21 +42,19 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor // CHECK-NOT: memref.copy -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor - } + // CHECK: return %[[res_tensor]] + return %0 : tensor } // ----- @@ -76,15 +72,13 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function_analysis( // CHECK-SAME: %[[A:.*]]: tensor -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} - // CHECK-SAME: tensor - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - return %0 : tensor - } +func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} + // CHECK-SAME: tensor + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + return %0 : tensor } // ----- @@ -101,12 +95,10 @@ module attributes {transform.with_named_sequence} { } } -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_unknown_op_failure() -> (tensor) { - // expected-error @+1 {{op was not bufferized}} - %0 = "test.dummy_op"() : () -> (tensor) - return %0 : tensor - } +func.func @test_unknown_op_failure() -> (tensor) { + // expected-error @+1 {{op was not bufferized}} + %0 = "test.dummy_op"() : () -> (tensor) + return %0 : tensor } // ----- @@ -119,7 +111,7 @@ module attributes {transform.with_named_sequence} { } } -module @payload attributes { transform.target_tag = "payload" } { +module { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { @@ -154,13 +146,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[A:.*]]: memref<12x9xf32>, // CHECK-SAME: %[[B:.*]]: memref<9x6xf32>, // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { - // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) - %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> - // CHECK: return %[[C]] : memref<12x6xf32> - return %D : tensor<12x6xf32> - } +func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { + // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) + %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> + // CHECK: return %[[C]] : memref<12x6xf32> + return %D : tensor<12x6xf32> } // ----- @@ -175,12 +165,10 @@ module attributes {transform.with_named_sequence} { } // Expect `bufferization.empty_tensor_to_alloc_tensor` to replace the tensor.empty. -module @payload attributes { transform.target_tag = "payload" } { - func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK: bufferization.alloc_tensor - %0 = tensor.empty() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> - } +func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK: bufferization.alloc_tensor + %0 = tensor.empty() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> } // ----- @@ -197,15 +185,13 @@ module attributes {transform.with_named_sequence} { // CHECK: tensor.extract_slice // CHECK: linalg.fill // CHECK: tensor.insert_slice -module @payload attributes { transform.target_tag = "payload" } { - func.func @empty_tensor_elimination( - %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { - %0 = tensor.empty() : tensor<5xf32> - %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> - %2 = tensor.insert_slice %1 into %t [1][5][1] - : tensor<5xf32> into tensor<10xf32> - return %2 : tensor<10xf32> - } +func.func @empty_tensor_elimination( + %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { + %0 = tensor.empty() : tensor<5xf32> + %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> + %2 = tensor.insert_slice %1 into %t [1][5][1] + : tensor<5xf32> into tensor<10xf32> + return %2 : tensor<10xf32> } // ----- @@ -222,14 +208,12 @@ module attributes {transform.with_named_sequence} { // CHECK: memref.alloca // CHECK: scf.for // CHECK: memref.store -module @payload attributes { transform.target_tag = "payload" } { - func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { - scf.for %iv = %lb to %ub step %step { - %0 = memref.alloca() : memref<5xf32> - memref.store %f, %0[%pos] : memref<5xf32> - } - return +func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { + scf.for %iv = %lb to %ub step %step { + %0 = memref.alloca() : memref<5xf32> + memref.store %f, %0[%pos] : memref<5xf32> } + return } // ----- @@ -247,12 +231,10 @@ module attributes {transform.with_named_sequence} { // Expect `bufferization.bufferize_to_allocation` to create an alloc. // CHECK-LABEL: func.func @empty_to_tensor_alloc() -module @payload attributes { transform.target_tag = "payload" } { - func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> - // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> - // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> - %0 = bufferization.alloc_tensor() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> - } +func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> + // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> + // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> + %0 = bufferization.alloc_tensor() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir index 3e637a3ec49a42..c00b47fb936e97 100644 --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -1,17 +1,15 @@ -// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s +// RUN: mlir-opt %s --transform-interpreter -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s // CHECK-LABEL: llvm.func @matmul_tensors -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul_tensors( - %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> { - // CHECK-NOT: linalg - // CHECK: llvm.intr.fmuladd{{.*}} - %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) - outs(%arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> - return %0 : tensor<2x6xf32> - } +func.func @matmul_tensors( + %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> { +// CHECK-NOT: linalg +// CHECK: llvm.intr.fmuladd{{.*}} + %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) + outs(%arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> + return %0 : tensor<2x6xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir index 9c223737750a9b..3f8d2ea06641e1 100644 --- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir +++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --transform-interpreter="debug-payload-root-tag=payload" %s | FileCheck %s +// RUN: mlir-opt --split-input-file --transform-interpreter %s | FileCheck %s // CHECK-LABEL: func @matmul_divisible // CHECK: scf.forall @@ -24,21 +24,19 @@ // CHECK: scf.forall // CHECK: vector.transfer_read // CHECK: vector.transfer_write -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul_divisible(%A: tensor<1024x1024xf32>, - %B: tensor<1024x1024xf32>, - %C: tensor<1024x1024xf32>) +func.func @matmul_divisible(%A: tensor<1024x1024xf32>, + %B: tensor<1024x1024xf32>, + %C: tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> +{ + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1024x1024xf32>) -> tensor<1024x1024xf32> - { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) - outs(%0 : tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> - return %1 : tensor<1024x1024xf32> - } + %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) + outs(%0 : tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> + return %1 : tensor<1024x1024xf32> } module attributes {transform.with_named_sequence} { @@ -145,21 +143,19 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.matmul // CHECK: vector.transfer_read // CHECK: vector.transfer_write -module @payload attributes { transform.target_tag = "payload" } { func.func @matmul_not_divisible(%A: tensor<1023x1023xf32>, - %B: tensor<1023x1023xf32>, - %C: tensor<1023x1023xf32>) + %B: tensor<1023x1023xf32>, + %C: tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> +{ + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1023x1023xf32>) -> tensor<1023x1023xf32> - { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) - outs(%0 : tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> - return %1 : tensor<1023x1023xf32> - } + %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) + outs(%0 : tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> + return %1 : tensor<1023x1023xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir index 5e5657980ba120..f2e9e839b7c46b 100644 --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt --transform-interpreter -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s #map = affine_map<()[s0] -> (-s0 + 12, 7)> @@ -7,45 +7,43 @@ // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -module @payload attributes { transform.target_tag = "payload" } { - func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] - // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] - // CHECK: memref.copy %[[s0]], %[[alloc0_view]] - - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] - // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] - // CHECK: memref.copy %[[s1]], %[[alloc1_view]] - - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] - // No subview because there is 0 padding - // CHECK: memref.copy %[[s2]], %[[alloc2]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> - } +func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] + // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] + // CHECK: memref.copy %[[s0]], %[[alloc0_view]] + + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] + // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] + // CHECK: memref.copy %[[s1]], %[[alloc1_view]] + + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] + // No subview because there is 0 padding + // CHECK: memref.copy %[[s2]], %[[alloc2]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } module attributes {transform.with_named_sequence} { @@ -71,42 +69,40 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -module @payload attributes { transform.target_tag = "payload" } { - func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] - - // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] - - // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> - } +func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] + + // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] + + // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir index 0439844dc66cad..4b38db79bff3e1 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -1,18 +1,16 @@ -// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s --split-input-file | FileCheck %s +// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s // CHECK-LABEL: func @matmul_tensors -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul_tensors( - %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> { - // CHECK-NOT: linalg - // CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> - // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> - %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) - outs(%arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> - return %0 : tensor<8x32xf32> - } +func.func @matmul_tensors( + %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> { +// CHECK-NOT: linalg +// CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> +// CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> + %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) + outs(%arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> + return %0 : tensor<8x32xf32> } module attributes {transform.with_named_sequence} { @@ -78,13 +76,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xf16>, vector<64x64xf16> into vector<64x64xf32> // CHECK-NEXT: return %[[R]] : vector<64x64xf32> -module @payload attributes { transform.target_tag = "payload" } { - func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { - %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> - %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> - %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> - return %result : vector<64x64xf32> - } +func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { + %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> + %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> + %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> + return %result : vector<64x64xf32> } module attributes {transform.with_named_sequence} { @@ -99,32 +95,30 @@ module attributes {transform.with_named_sequence} { // ----- -module @payload attributes { transform.target_tag = "payload" } { - // CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 - // CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, - // CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { - // CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> - // CHECK: return %[[RES]] : vector<[4]x[4]xi32> - func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { - %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> - %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> - return %mul: vector<[4]x[4]xi32> - } +// CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 +// CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, +// CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { +// CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> +// CHECK: return %[[RES]] : vector<[4]x[4]xi32> +func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { + %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> + %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> + return %mul: vector<[4]x[4]xi32> +} - // CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 - // CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, - // CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { - // CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> - // CHECK: return %[[RES]] : vector<8x16xf32> - func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { - %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> - %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> - %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> - %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> - return %mul: vector<8x16xf32> - } +// CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 +// CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, +// CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { +// CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> +// CHECK: return %[[RES]] : vector<8x16xf32> +func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { + %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> + %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> + %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> + %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> + return %mul: vector<8x16xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Examples/transform/ChH/full.mlir b/mlir/test/Examples/transform/ChH/full.mlir index 85dbf670233232..259475ebdbf49e 100644 --- a/mlir/test/Examples/transform/ChH/full.mlir +++ b/mlir/test/Examples/transform/ChH/full.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" \ -// RUN: --test-transform-dialect-erase-schedule |\ -// RUN: mlir-opt -pass-pipeline='builtin.module(builtin.module(math-uplift-to-fma,convert-bufferization-to-memref,test-lower-to-llvm))' - |\ +// RUN: mlir-opt %s --transform-interpreter \ +// RUN: --test-transform-dialect-erase-schedule \ +// RUN: --math-uplift-to-fma \ +// RUN: --convert-bufferization-to-memref \ +// RUN: --test-lower-to-llvm |\ // RUN: FileCheck %s // Fixed-size tensor types to be used in convolution. @@ -17,7 +19,6 @@ // tensors annotated with attributes from the `bufferization` dialect. These // attributes hint the bufferization pass to assume buffers can be directly // used for these tensors without reshaping. -module @payload attributes { transform.target_tag = "payload" } { func.func @conv( %input: !tinput {bufferization.writable = false, bufferization.access = "read", @@ -83,7 +84,7 @@ func.func @conv( return %relued : !toutput } -} + // Module containing the transformation script to be applied. The attribute // is required to correctly verify the use of named (macro-like) sequences. module attributes { transform.with_named_sequence } { From 2ccac07bf22d17029d4437b0a727dd55c8c86d56 Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin <34227995+dmpolukhin@users.noreply.github.com> Date: Wed, 25 Sep 2024 08:31:49 +0100 Subject: [PATCH 33/65] [C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#109167) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Because AST loading code is lazy and happens in unpredictable order, it is possible that a function and lambda inside the function can be loaded from different modules. As a result, the captured DeclRefExpr won’t match the corresponding VarDecl inside the function. This situation is reflected in the AST as follows: ``` FunctionDecl 0x555564f4aff0 line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected ()' inline |-also in ./folly-conv.h `-CompoundStmt 0x555564f7cfc8 |-DeclStmt 0x555564f7ced8 | `-VarDecl 0x555564f7cef8 col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit | `-IntegerLiteral 0x555564f7d080 'int' 0 |-CallExpr 0x555564f7cea8 '' | |-UnresolvedLookupExpr 0x555564f7bea0 '' lvalue (no ADL) = 'then_' 0x555564f7bef0 | |-CXXTemporaryObjectExpr 0x555564f7bcb0 'Expected':'folly::Expected' 'void () noexcept' zeroing | `-LambdaExpr 0x555564f7bc88 '(lambda at Conv.h:39:48)' | |-CXXRecordDecl 0x555564f76b88 col:48 imported in ./folly-conv.h hidden implicit class definition | | |-also in ./thrift_cpp2_base.h | | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init | | |-DefaultConstructor defaulted_is_constexpr | | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveConstructor exists simple trivial needs_implicit | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveAssignment | | `-Destructor simple irrelevant trivial constexpr needs_implicit | `-CompoundStmt 0x555564f7d1a8 | `-ReturnStmt 0x555564f7d198 | `-DeclRefExpr 0x555564f7d0a0 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture `-ReturnStmt 0x555564f7bc78 `-InitListExpr 0x555564f7bc38 'void' ``` This diff modifies the AST deserialization process to load lambdas within the canonical function declaration sooner, immediately following the function, ensuring that they are loaded from the same module. Re-land https://github.com/llvm/llvm-project/pull/104512 Added test case that caused crash due to multiple enclosed lambdas deserialization. Test Plan: check-clang --- .../include/clang/Serialization/ASTBitCodes.h | 6 ++ clang/include/clang/Serialization/ASTReader.h | 12 +++ clang/include/clang/Serialization/ASTWriter.h | 8 ++ clang/lib/Serialization/ASTReader.cpp | 11 +++ clang/lib/Serialization/ASTReaderDecl.cpp | 10 +++ clang/lib/Serialization/ASTWriter.cpp | 22 ++++++ clang/lib/Serialization/ASTWriterDecl.cpp | 5 ++ ...rash-instantiated-in-scope-cxx-modules.cpp | 76 +++++++++++++++++++ ...ash-instantiated-in-scope-cxx-modules2.cpp | 30 ++++++++ ...ash-instantiated-in-scope-cxx-modules3.cpp | 26 +++++++ 10 files changed, 206 insertions(+) create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 4410df296d8efc..5be33ae0ed1b98 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -724,6 +724,12 @@ enum ASTRecordTypes { /// Record code for vtables to emit. VTABLES_TO_EMIT = 70, + + /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to + /// be loaded right after the function they belong to. It is required to have + /// canonical declaration for the lambda class from the same module as + /// enclosing function. + FUNCTION_DECL_TO_LAMBDAS_MAP = 71, }; /// Record types used within a source manager block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 898f4392465fdf..c1843218a4b8b1 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -532,6 +532,18 @@ class ASTReader /// namespace as if it is not delayed. DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; + /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// It is required to have canonical declaration for lambda class from the + /// same module as enclosing function. This is required to correctly resolve + /// captured variables in the lambda. Without this, due to lazy + /// deserialization, canonical declarations for the function and lambdas can + /// be selected from different modules and DeclRefExprs may refer to the AST + /// nodes that don't exist in the function. + llvm::DenseMap> + FunctionToLambdasMap; + struct PendingUpdateRecord { Decl *D; GlobalDeclID ID; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 10a50b711043a8..760866fd9de938 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,6 +233,14 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet PredefinedDecls; + /// Mapping from FunctionDecl to the list of lambda IDs inside the function. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// In order to have canonical declaration for lambda class from the same + /// module as enclosing function during deserialization. + llvm::DenseMap> + FunctionToLambdasMap; + /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. std::vector DeclOffsets; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index ede3070787722d..a369ad0be47954 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3856,6 +3856,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case FUNCTION_DECL_TO_LAMBDAS_MAP: + for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { + GlobalDeclID ID = ReadDeclID(F, Record, I); + auto &Lambdas = FunctionToLambdasMap[ID]; + unsigned NN = Record[I++]; + Lambdas.reserve(NN); + for (unsigned II = 0; II < NN; II++) + Lambdas.push_back(ReadDeclID(F, Record, I)); + } + break; + case OBJC_CATEGORIES_MAP: if (F.LocalNumObjCCategoriesInMap != 0) return llvm::createStringError( diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 9272e23c7da3fc..7cead2728ca938 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4351,6 +4351,16 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } + + // Load any pending lambdas for the function. + if (auto *FD = dyn_cast(D); FD && FD->isCanonicalDecl()) { + if (auto IT = FunctionToLambdasMap.find(ID); + IT != FunctionToLambdasMap.end()) { + for (auto LID : IT->second) + GetDecl(LID); + FunctionToLambdasMap.erase(IT); + } + } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4ee14b1e260159..f326e3c2e2ff7a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PENDING_IMPLICIT_INSTANTIATIONS); RECORD(UPDATE_VISIBLE); RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD); + RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP); RECORD(DECL_UPDATE_OFFSETS); RECORD(DECL_UPDATES); RECORD(CUDA_SPECIAL_DECL_REFS); @@ -5707,6 +5708,27 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD, DelayedNamespaceRecord); + if (!FunctionToLambdasMap.empty()) { + // TODO: on disk hash table for function to lambda mapping might be more + // efficent becuase it allows lazy deserialization. + RecordData FunctionToLambdasMapRecord; + for (const auto &Pair : FunctionToLambdasMap) { + FunctionToLambdasMapRecord.push_back( + GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back(Pair.second.size()); + for (const auto &Lambda : Pair.second) + FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); + } + + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv)); + Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord, + FunctionToLambdaMapAbbrev); + } + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 555f6325da646b..50c090b195d619 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1521,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { } else { Record.push_back(0); } + // For lambdas inside canonical FunctionDecl remember the mapping. + if (auto FD = llvm::dyn_cast_or_null(D->getDeclContext()); + FD && FD->isCanonicalDecl()) { + Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D)); + } } else { Record.push_back(CXXRecNotTemplate); } diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp new file mode 100644 index 00000000000000..80844a58ad825a --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp @@ -0,0 +1,76 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h + +//--- Conv.h +#pragma once + +template +_Up __declval(int); + +template +auto declval() noexcept -> decltype(__declval<_Tp>(0)); + +namespace folly { + +template +struct Expected { + template + auto thenOrThrow() -> decltype(declval()) { + return 1; + } +}; + +struct ExpectedHelper { + template + static constexpr Expected return_(T) { + return Expected(); + } + + template + static auto then_(This&&, Fn&&) + -> decltype(T::template return_((declval()(true), 0))) { + return Expected(); + } +}; + +template +inline Expected tryTo() { + Tgt result = 0; + // In build with asserts: + // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa(D) && "declaration not instantiated in this scope"' failed. + // In release build compilation error on the line below inside lambda: + // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized] + ExpectedHelper::then_(Expected(), [&](bool) { return result; }); + return {}; +} + +} // namespace folly + +inline void bar() { + folly::tryTo(); +} +// expected-no-diagnostics + +//--- folly-conv.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- thrift_cpp2_base.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- logger_base.h +#pragma once +import "folly-conv.h"; +import "thrift_cpp2_base.h"; + +inline void foo() { + folly::tryTo(); +} +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp new file mode 100644 index 00000000000000..5b1a904e928a68 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp @@ -0,0 +1,30 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h +// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp + +//--- header.h +template +void f(T) {} + +class A { + virtual ~A(); +}; + +inline A::~A() { + f([](){}); +} + +struct B { + void g() { + f([](){ + [](){}; + }); + } +}; +// expected-no-diagnostics + +//--- main.cpp +import "header.h"; +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp new file mode 100644 index 00000000000000..646ff9f745710b --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t +// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// No crash or assertion failure on multiple nested lambdas deserialization. +template +void b() { + [] { + []{ + []{ + []{ + []{ + }(); + }(); + }(); + }(); + }(); +} + +void foo() { + b(); +} +#endif From 0259f92711599c45d229fb12f6f51915fffac6bd Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 25 Sep 2024 09:33:15 +0200 Subject: [PATCH 34/65] [mlir][memref] Add builder that infers `reinterpret_cast` result type (#109432) Add a convenience builder that infers the result type of `memref.reinterpret_cast`. Note: It is not possible to remove the result type from all builder overloads because this op currently also allows certain operand/attribute + result type combinations that do not match. The op verifier should probably be made stricter, but that's a larger change that requires additional `memref.cast` ops in some places that build `reinterpret_cast` ops. --- .../mlir/Dialect/MemRef/IR/MemRefOps.td | 4 ++++ .../IR/BufferDeallocationOpInterface.cpp | 6 ++++-- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 2ff9d612a5efa7..c50df6ccd9aa56 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -1407,6 +1407,10 @@ def MemRef_ReinterpretCastOp "OpFoldResult":$offset, "ArrayRef":$sizes, "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, + // Build a ReinterpretCastOp and infer the result type. + OpBuilder<(ins "Value":$source, "OpFoldResult":$offset, + "ArrayRef":$sizes, "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, // Build a ReinterpretCastOp with static entries. OpBuilder<(ins "MemRefType":$resultType, "Value":$source, "int64_t":$offset, "ArrayRef":$sizes, diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp index b197786c320548..51dfd84d9ac601 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp @@ -197,8 +197,10 @@ LogicalResult DeallocationState::getMemrefsAndConditionsToDeallocate( // that we can call extract_strided_metadata on it. if (auto unrankedMemRefTy = dyn_cast(memref.getType())) memref = builder.create( - loc, MemRefType::get({}, unrankedMemRefTy.getElementType()), memref, - 0, SmallVector{}, SmallVector{}); + loc, memref, + /*offset=*/builder.getIndexAttr(0), + /*sizes=*/ArrayRef{}, + /*strides=*/ArrayRef{}); // Use the `memref.extract_strided_metadata` operation to get the base // memref. This is needed because the same MemRef that was produced by the diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 9c021d3613f1c8..75b9729e63648c 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1832,6 +1832,24 @@ void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, b.getDenseI64ArrayAttr(staticStrides)); } +void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, + Value source, OpFoldResult offset, + ArrayRef sizes, + ArrayRef strides, + ArrayRef attrs) { + auto sourceType = cast(source.getType()); + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides); + auto stridedLayout = StridedLayoutAttr::get( + b.getContext(), staticOffsets.front(), staticStrides); + auto resultType = MemRefType::get(staticSizes, sourceType.getElementType(), + stridedLayout, sourceType.getMemorySpace()); + build(b, result, resultType, source, offset, sizes, strides, attrs); +} + void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, MemRefType resultType, Value source, int64_t offset, ArrayRef sizes, From b7b945b09cddad128e42d82d759e74f0cd5b0ee3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Sep 2024 11:35:16 +0400 Subject: [PATCH 35/65] LiveRangeCalc: Pass output stream to verify Restores output on this failure after 71ca9fcb8dc9ea0e1e3a4a47820edc78c398a85e --- llvm/lib/CodeGen/LiveRangeCalc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index f7d9e5c44ac2e5..e325e77189a6fa 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -208,7 +208,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #ifndef NDEBUG if (MBB->pred_empty()) { - MBB->getParent()->verify(); + MBB->getParent()->verify(nullptr, nullptr, &errs()); errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo()) << " does not have a corresponding definition on every path:\n"; const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); @@ -223,7 +223,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, for (MCRegAliasIterator Alias(PhysReg, TRI, false); !IsLiveIn && Alias.isValid(); ++Alias) IsLiveIn = MBB->isLiveIn(*Alias); if (!IsLiveIn) { - MBB->getParent()->verify(); + MBB->getParent()->verify(nullptr, nullptr, &errs()); errs() << "The register " << printReg(PhysReg, TRI) << " needs to be live in to " << printMBBReference(*MBB) << ", but is missing from the live-in list.\n"; From 8e3cde04cbf13d2aa2251acdd8adbae8d3edd43d Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 25 Sep 2024 15:31:24 +0800 Subject: [PATCH 36/65] [LoongArch][test] Add float-point atomic load/store tests. NFC --- .../ir-instruction/load-store-atomic.ll | 337 ++++++++++++++++++ 1 file changed, 337 insertions(+) diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index 1af2b38d799436..9ef74e4960ce7c 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -88,6 +88,50 @@ define ptr @load_acquire_ptr(ptr %ptr) { ret ptr %val } +define float @load_acquire_float(ptr %ptr) { +; LA32-LABEL: load_acquire_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: dbar 20 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr acquire, align 8 + ret float %val +} + +define double @load_acquire_double(ptr %ptr) { +; LA32-LABEL: load_acquire_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr acquire, align 8 + ret double %val +} + define i8 @load_unordered_i8(ptr %ptr) { ; LA32-LABEL: load_unordered_i8: ; LA32: # %bb.0: @@ -165,6 +209,47 @@ define ptr @load_unordered_ptr(ptr %ptr) { ret ptr %val } +define float @load_unordered_float(ptr %ptr) { +; LA32-LABEL: load_unordered_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr unordered, align 8 + ret float %val +} + +define double @load_unordered_double(ptr %ptr) { +; LA32-LABEL: load_unordered_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr unordered, align 8 + ret double %val +} + define i8 @load_monotonic_i8(ptr %ptr) { ; LA32-LABEL: load_monotonic_i8: ; LA32: # %bb.0: @@ -242,6 +327,47 @@ define ptr @load_monotonic_ptr(ptr %ptr) { ret ptr %val } +define float @load_monotonic_float(ptr %ptr) { +; LA32-LABEL: load_monotonic_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr monotonic, align 8 + ret float %val +} + +define double @load_monotonic_double(ptr %ptr) { +; LA32-LABEL: load_monotonic_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr monotonic, align 8 + ret double %val +} + define i8 @load_seq_cst_i8(ptr %ptr) { ; LA32-LABEL: load_seq_cst_i8: ; LA32: # %bb.0: @@ -328,6 +454,50 @@ define ptr @load_seq_cst_ptr(ptr %ptr) { ret ptr %val } +define float @load_seq_cst_float(ptr %ptr) { +; LA32-LABEL: load_seq_cst_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr seq_cst, align 8 + ret float %val +} + +define double @load_seq_cst_double(ptr %ptr) { +; LA32-LABEL: load_seq_cst_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr seq_cst, align 8 + ret double %val +} + define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: @@ -411,6 +581,48 @@ define void @store_release_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_release_float(ptr %ptr, float %v) { +; LA32-LABEL: store_release_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: dbar 18 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr release, align 8 + ret void +} + +define void @store_release_double(ptr %ptr, double %v) { +; LA32-LABEL: store_release_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr release, align 8 + ret void +} + define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_unordered_i8: ; LA32: # %bb.0: @@ -488,6 +700,47 @@ define void @store_unordered_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_unordered_float(ptr %ptr, float %v) { +; LA32-LABEL: store_unordered_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_unordered_double(ptr %ptr, double %v) { +; LA32-LABEL: store_unordered_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr unordered, align 8 + ret void +} + define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_monotonic_i8: ; LA32: # %bb.0: @@ -565,6 +818,47 @@ define void @store_monotonic_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_monotonic_float(ptr %ptr, float %v) { +; LA32-LABEL: store_monotonic_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_monotonic_double(ptr %ptr, double %v) { +; LA32-LABEL: store_monotonic_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr monotonic, align 8 + ret void +} + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_seq_cst_i8: ; LA32: # %bb.0: @@ -653,3 +947,46 @@ define void @store_seq_cst_ptr(ptr %ptr, ptr %v) { store atomic ptr %v, ptr %ptr seq_cst, align 8 ret void } + +define void @store_seq_cst_float(ptr %ptr, float %v) { +; LA32-LABEL: store_seq_cst_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr seq_cst, align 8 + ret void +} + +define void @store_seq_cst_double(ptr %ptr, double %v) { +; LA32-LABEL: store_seq_cst_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr seq_cst, align 8 + ret void +} From 4f90e75bdc156d2630da525eb74d00611753c706 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 25 Sep 2024 00:41:49 -0700 Subject: [PATCH 37/65] [AMDGPU] Do not count implicit VGPRs in SIInsertWaitcnts (#109049) When generating waitcounts before a use or def skip VGPRs. We never have a real implicit VGPR operands on memory instructions, it is only for super-reg liveness accounting. Some other instructions (MOVRELS as an example) may have real implicit VGPR uses though. This is less then ideal but most of the problems observed with spills. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 8 + .../GlobalISel/image-waterfall-loop-O0.ll | 1 - llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 23 +- llvm/test/CodeGen/AMDGPU/div_i128.ll | 96 ++++---- .../CodeGen/AMDGPU/indirect-addressing-si.ll | 57 +++-- llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 11 +- llvm/test/CodeGen/AMDGPU/load-global-i32.ll | 1 - ...uf-legalize-operands-non-ptr-intrinsics.ll | 1 - .../CodeGen/AMDGPU/mubuf-legalize-operands.ll | 1 - llvm/test/CodeGen/AMDGPU/rem_i128.ll | 48 ++-- llvm/test/CodeGen/AMDGPU/spill-wait.mir | 211 +++++++++++++++--- llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir | 2 +- llvm/test/CodeGen/AMDGPU/waitcnt.mir | 2 +- 13 files changed, 332 insertions(+), 130 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index a5668272601384..80a7529002ac90 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1752,6 +1752,14 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg()); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { if (IsVGPR) { + // Implicit VGPR defs and uses are never a part of the memory + // instructions description and usually present to account for + // super-register liveness. + // TODO: Most of the other instructions also have implicit uses + // for the liveness accounting only. + if (Op.isImplicit() && MI.mayLoadOrStore()) + continue; + // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the // previous write and this write are the same type of VMEM // instruction, in which case they are (in some architectures) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index 10cbc56cc5fbea..de973481f82308 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -187,7 +187,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ; kill: killed $vgpr4 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 51f9cf73488ee7..67a084068941a4 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -778,8 +778,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then -; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -824,8 +824,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_6 ; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2 -; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -1242,10 +1242,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_7 ; GCN-O0-NEXT: .LBB5_6: ; %Flow @@ -1263,10 +1266,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_5 ; GCN-O0-NEXT: .LBB5_7: ; %bb10 @@ -1336,10 +1342,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 @@ -1356,9 +1365,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(2) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 712cecff40617b..b541be9f5aa444 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -570,21 +570,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 @@ -599,9 +599,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -663,9 +663,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -1718,17 +1718,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_5 ; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2 @@ -1743,11 +1747,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_9 ; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit @@ -1822,11 +1828,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -2787,21 +2795,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 @@ -2816,9 +2824,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2880,9 +2888,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -3846,17 +3854,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_5 ; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2 @@ -3871,11 +3883,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_9 ; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit @@ -3950,11 +3964,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index b4fe112438b4f7..60946956547a7c 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -4111,7 +4111,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4137,7 +4136,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload @@ -4146,12 +4144,19 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload @@ -4178,7 +4183,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4597,7 +4601,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4623,7 +4626,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload @@ -4632,12 +4634,19 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload @@ -4664,7 +4673,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -5912,7 +5920,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 7 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 8 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload @@ -5921,12 +5928,19 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload @@ -5953,7 +5967,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill @@ -6041,7 +6054,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 11 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 12 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload @@ -6050,12 +6062,19 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload @@ -6082,7 +6101,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill @@ -9175,7 +9193,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload @@ -9184,12 +9201,19 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload @@ -9216,7 +9240,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill @@ -9641,7 +9664,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill @@ -9667,7 +9689,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 9 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 10 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload @@ -9676,12 +9697,19 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload @@ -9708,7 +9736,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index fe5427048e8cf1..e0c2d00891250b 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -3569,7 +3569,6 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 @@ -3577,7 +3576,6 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 @@ -4382,7 +4380,6 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 @@ -7350,12 +7347,15 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_store_dword v13, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v14, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(3) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39 @@ -7378,7 +7378,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll index 5ae2b91bdb3e7c..4d7f1a9663c3dc 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll @@ -3098,7 +3098,6 @@ define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %ou ; SI-NOHSA-NEXT: buffer_load_dword v8, off, s[12:15], 0 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: buffer_load_dword v9, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: buffer_load_dword v10, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) ; SI-NOHSA-NEXT: buffer_load_dword v11, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) ; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index 85d342bf303c08..c302233e748fda 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -244,7 +244,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 42ed4c1f2e63d4..dd6fd5aa384f6c 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -258,7 +258,6 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index fd6e06afc67da9..19cc60963e9007 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -609,21 +609,21 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 @@ -638,9 +638,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -702,9 +702,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2007,21 +2007,21 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 @@ -2036,9 +2036,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2100,9 +2100,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-wait.mir b/llvm/test/CodeGen/AMDGPU/spill-wait.mir index 8e896252af89b4..6983a2742a41c0 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-wait.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-wait.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s # There shall be no S_WAITCNT between two stores. @@ -10,14 +11,27 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 - ; GCN-LABEL: name: spill_vgpr_tuple - ; GCN: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: spill_vgpr_tuple + ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: spill_vgpr_tuple + ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 + ; GFX12-NEXT: S_ENDPGM 0 $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 @@ -33,14 +47,27 @@ body: | bb.0: liveins: $vgpr0, $sgpr10_sgpr11 - ; GCN-LABEL: name: load_vcc_wait - ; GCN: liveins: $vgpr0, $sgpr10_sgpr11 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_vcc_wait + ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_vcc_wait + ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec S_ENDPGM 0 @@ -55,14 +82,27 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 - ; GCN-LABEL: name: load_flat_scr_lo_flat_load_wait - ; GCN: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait + ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait + ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -75,15 +115,120 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 - ; GCN-LABEL: name: load_flat_scr_lo_scratch_store_wait - ; GCN: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait + ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait + ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... + +# Check that implicit spill defs do not force wait to zero on the first store + +--- +name: spill_load_store + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + + ; GFX9-LABEL: name: spill_load_store + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: spill_load_store + ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_WAIT_LOADCNT 3 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_WAIT_LOADCNT 2 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + ; GFX12-NEXT: S_WAIT_LOADCNT 1 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + ; GFX12-NEXT: S_WAIT_LOADCNT 0 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + S_ENDPGM 0 +... + +# Make sure we have wait to mitigate WAW on gfx12 + +--- +name: scratch_load_waw +body: | + bb.0.entry: + liveins: $vgpr0, $sgpr0 + + ; GFX9-LABEL: name: scratch_load_waw + ; GFX9: liveins: $vgpr0, $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: scratch_load_waw + ; GFX12: liveins: $vgpr0, $sgpr0 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_WAIT_LOADCNT 0 + ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir index d69cb448b95de5..7a807260d142dd 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir @@ -321,8 +321,8 @@ body: | ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: } - ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { + ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: } BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir index 4051be18dd49f9..8528de77533bf2 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -301,8 +301,8 @@ body: | # CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { # CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr # CHECK-NEXT: } -# CHECK-NEXT: S_WAITCNT 112 # CHECK-NEXT: BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 { +# CHECK-NEXT: S_WAITCNT 112 # CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr # CHECK-NEXT: } From cda0cb3939308ab8ac81e1898228a3b7744a2801 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 25 Sep 2024 08:56:37 +0100 Subject: [PATCH 38/65] [NFC][LoopVectorize] Remove unused argument from fixupIVUsers (#109789) The VectorHeader argument passed to fixupIVUsers is unused and can be removed. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09e4d0fcd31f3c..5e4f33c55610f1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -541,8 +541,8 @@ class InnerLoopVectorizer { /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, BasicBlock *VectorHeader, - VPlan &Plan, VPTransformState &State); + BasicBlock *MiddleBlock, VPlan &Plan, + VPTransformState &State); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. @@ -2742,8 +2742,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, - BasicBlock *VectorHeader, VPlan &Plan, + BasicBlock *MiddleBlock, VPlan &Plan, VPTransformState &State) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate @@ -2954,8 +2953,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (const auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), - IVEndValues[Entry.first], LoopMiddleBlock, - VectorLoop->getHeader(), Plan, State); + IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); } // Fix live-out phis not already fixed earlier. From b8d1bae6484625332ab7596390b855e6dacfead7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 Sep 2024 10:14:15 +0200 Subject: [PATCH 39/65] [CmpInstAnalysis] Return decomposed bit test as struct (NFC) (#109819) decomposeBitTestICmp() currently returns the result via two out parameters plus an in-place modification of Pred. This changes it to return an optional struct instead. The motivation here is twofold. First, I'd like to extend this code to handle cases where the comparison is against a value other than zero, which would mean yet another out parameter. Second, while doing that I was badly bitten by the in-place modification, so I'd like to get rid of it. --- llvm/include/llvm/Analysis/CmpInstAnalysis.h | 19 ++++-- llvm/lib/Analysis/CmpInstAnalysis.cpp | 67 ++++++++++--------- llvm/lib/Analysis/InstructionSimplify.cpp | 10 ++- .../InstCombine/InstCombineAndOrXor.cpp | 20 ++++-- .../InstCombine/InstCombineCompares.cpp | 9 ++- .../InstCombine/InstCombineSelect.cpp | 20 +++--- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 15 +++-- 7 files changed, 91 insertions(+), 69 deletions(-) diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h index 1d07a0c22887bb..406dacd930605e 100644 --- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h +++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_CMPINSTANALYSIS_H #define LLVM_ANALYSIS_CMPINSTANALYSIS_H +#include "llvm/ADT/APInt.h" #include "llvm/IR/InstrTypes.h" namespace llvm { @@ -91,12 +92,18 @@ namespace llvm { Constant *getPredForFCmpCode(unsigned Code, Type *OpTy, CmpInst::Predicate &Pred); - /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The - /// returned predicate is either == or !=. Returns false if decomposition - /// fails. - bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, - Value *&X, APInt &Mask, - bool LookThroughTrunc = true); + /// Represents the operation icmp (X & Mask) pred 0, where pred can only be + /// eq or ne. + struct DecomposedBitTest { + Value *X; + CmpInst::Predicate Pred; + APInt Mask; + }; + + /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. + std::optional + decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, + bool LookThroughTrunc = true); } // end namespace llvm diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp index a1fa7857764d98..36d7aa510545af 100644 --- a/llvm/lib/Analysis/CmpInstAnalysis.cpp +++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp @@ -73,81 +73,84 @@ Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy, return nullptr; } -bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, - CmpInst::Predicate &Pred, - Value *&X, APInt &Mask, bool LookThruTrunc) { +std::optional +llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, + bool LookThruTrunc) { using namespace PatternMatch; const APInt *C; if (!match(RHS, m_APIntAllowPoison(C))) - return false; + return std::nullopt; + DecomposedBitTest Result; switch (Pred) { default: - return false; + return std::nullopt; case ICmpInst::ICMP_SLT: // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SLE: // X <= -1 is equivalent to (X & SignMask) != 0. if (!C->isAllOnes()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnes()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_SGE: // X >= 0 is equivalent to (X & SignMask) == 0. if (!C->isZero()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: // X isPowerOf2()) - return false; - Mask = -*C; - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = -*C; + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULE: // X <=u 2^n-1 is equivalent to (X & ~(2^n-1)) == 0. if (!(*C + 1).isPowerOf2()) - return false; - Mask = ~*C; - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = ~*C; + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_UGT: // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. if (!(*C + 1).isPowerOf2()) - return false; - Mask = ~*C; - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = ~*C; + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_UGE: // X >=u 2^n is equivalent to (X & ~(2^n-1)) != 0. if (!C->isPowerOf2()) - return false; - Mask = -*C; - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = -*C; + Result.Pred = ICmpInst::ICMP_NE; break; } + Value *X; if (LookThruTrunc && match(LHS, m_Trunc(m_Value(X)))) { - Mask = Mask.zext(X->getType()->getScalarSizeInBits()); + Result.X = X; + Result.Mask = Result.Mask.zext(X->getType()->getScalarSizeInBits()); } else { - X = LHS; + Result.X = LHS; } - return true; + return Result; } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 32a9f1ab34fb3f..90f05d43a2b147 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4624,13 +4624,11 @@ static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS, static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, ICmpInst::Predicate Pred, Value *TrueVal, Value *FalseVal) { - Value *X; - APInt Mask; - if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) - return nullptr; + if (auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred)) + return simplifySelectBitTest(TrueVal, FalseVal, Res->X, &Res->Mask, + Res->Pred == ICmpInst::ICMP_EQ); - return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask, - Pred == ICmpInst::ICMP_EQ); + return nullptr; } /// Try to simplify a select instruction when its condition operand is an diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 80d3adedfc89f3..e8c0b006616543 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -181,11 +181,13 @@ static unsigned conjugateICmpMask(unsigned Mask) { // Adapts the external decomposeBitTestICmp for local use. static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, Value *&Y, Value *&Z) { - APInt Mask; - if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask)) + auto Res = llvm::decomposeBitTestICmp(LHS, RHS, Pred); + if (!Res) return false; - Y = ConstantInt::get(X->getType(), Mask); + Pred = Res->Pred; + X = Res->X; + Y = ConstantInt::get(X->getType(), Res->Mask); Z = ConstantInt::get(X->getType(), 0); return true; } @@ -870,11 +872,15 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, APInt &UnsetBitsMask) -> bool { CmpInst::Predicate Pred = ICmp->getPredicate(); // Can it be decomposed into icmp eq (X & Mask), 0 ? - if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), - Pred, X, UnsetBitsMask, - /*LookThroughTrunc=*/false) && - Pred == ICmpInst::ICMP_EQ) + auto Res = + llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), + Pred, /*LookThroughTrunc=*/false); + if (Res && Res->Pred == ICmpInst::ICMP_EQ) { + X = Res->X; + UnsetBitsMask = Res->Mask; return true; + } + // Is it icmp eq (X & Mask), 0 already? const APInt *Mask; if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) && diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 698abbb34c18c3..b1215bb4d83b0f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5905,11 +5905,10 @@ Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) { // This matches patterns corresponding to tests of the signbit as well as: // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?) - APInt Mask; - if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) { - Value *And = Builder.CreateAnd(X, Mask); - Constant *Zero = ConstantInt::getNullValue(X->getType()); - return new ICmpInst(Pred, And, Zero); + if (auto Res = decomposeBitTestICmp(Op0, Op1, Pred, /*WithTrunc=*/true)) { + Value *And = Builder.CreateAnd(Res->X, Res->Mask); + Constant *Zero = ConstantInt::getNullValue(Res->X->getType()); + return new ICmpInst(Res->Pred, And, Zero); } unsigned SrcBits = X->getType()->getScalarSizeInBits(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7476db9ee38f45..3dbe95897d6356 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -145,12 +145,15 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, return nullptr; AndMask = *AndRHS; - } else if (decomposeBitTestICmp(Cmp->getOperand(0), Cmp->getOperand(1), - Pred, V, AndMask)) { - assert(ICmpInst::isEquality(Pred) && "Not equality test?"); - if (!AndMask.isPowerOf2()) + } else if (auto Res = decomposeBitTestICmp(Cmp->getOperand(0), + Cmp->getOperand(1), Pred)) { + assert(ICmpInst::isEquality(Res->Pred) && "Not equality test?"); + if (!Res->Mask.isPowerOf2()) return nullptr; + V = Res->X; + AndMask = Res->Mask; + Pred = Res->Pred; CreateAnd = true; } else { return nullptr; @@ -747,12 +750,13 @@ static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal, C1Log = C1->logBase2(); } else { - APInt C1; - if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CmpLHS, C1) || - !C1.isPowerOf2()) + auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred); + if (!Res || !Res->Mask.isPowerOf2()) return nullptr; - C1Log = C1.logBase2(); + CmpLHS = Res->X; + Pred = Res->Pred; + C1Log = Res->Mask.logBase2(); NeedAnd = true; } diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 578d087e470e1e..740e1e39b9ee77 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -2464,11 +2464,16 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, (BitPos = ConstantExpr::getExactLogBase2(cast(BitMask))); }; auto MatchDecomposableConstantBitMask = [&]() { - APInt Mask; - return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && - ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && - (BitMask = ConstantInt::get(CurrX->getType(), Mask)) && - (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2())); + auto Res = llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred); + if (Res && Res->Mask.isPowerOf2()) { + assert(ICmpInst::isEquality(Res->Pred)); + Pred = Res->Pred; + CurrX = Res->X; + BitMask = ConstantInt::get(CurrX->getType(), Res->Mask); + BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2()); + return true; + } + return false; }; if (!MatchVariableBitMask() && !MatchConstantBitMask() && From c58e51ac9786a335f7ebfe8978605e2379cc2c2a Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 16:21:38 +0800 Subject: [PATCH 40/65] [ConstantFPRange] Suppress unused result warnings. NFC. (#109921) Fixes warnings `error: ignoring return value of function declared with 'nodiscard' attribute [-Werror,-Wunused-result]`. --- llvm/unittests/IR/ConstantFPRangeTest.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index dbb6c4545a1831..bf6ea95c00e22e 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -414,18 +414,21 @@ TEST_F(ConstantFPRangeTest, Print) { #ifdef GTEST_HAS_DEATH_TEST #ifndef NDEBUG TEST_F(ConstantFPRangeTest, NonCanonicalEmptySet) { - EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0)), + EXPECT_DEATH((void)(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0))), "Non-canonical form"); } TEST_F(ConstantFPRangeTest, MismatchedSemantics) { - EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f)), + EXPECT_DEATH((void)(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f))), "Should only use the same semantics"); - EXPECT_DEATH(One.contains(APFloat(1.0f)), + EXPECT_DEATH((void)(One.contains(APFloat(1.0f))), "Should only use the same semantics"); ConstantFPRange OneF32 = ConstantFPRange(APFloat(1.0f)); - EXPECT_DEATH(One.contains(OneF32), "Should only use the same semantics"); - EXPECT_DEATH(One.intersectWith(OneF32), "Should only use the same semantics"); - EXPECT_DEATH(One.unionWith(OneF32), "Should only use the same semantics"); + EXPECT_DEATH((void)(One.contains(OneF32)), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.intersectWith(OneF32)), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.unionWith(OneF32)), + "Should only use the same semantics"); } #endif #endif From 99cd4cb1231a7209725722e477c2d829a18fb252 Mon Sep 17 00:00:00 2001 From: "Miguel A. Arroyo" Date: Wed, 25 Sep 2024 01:29:40 -0700 Subject: [PATCH 41/65] [LLD][MINGW] Add `--undefined-glob` flag support (#109866) --- lld/MinGW/Driver.cpp | 2 ++ lld/MinGW/Options.td | 2 ++ lld/docs/ReleaseNotes.rst | 1 + lld/test/MinGW/driver.test | 3 +++ 4 files changed, 8 insertions(+) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index c7d7b9cfca386f..553698d4f537fc 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -514,6 +514,8 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, for (auto *a : args.filtered(OPT_require_defined)) add("-include:" + StringRef(a->getValue())); + for (auto *a : args.filtered(OPT_undefined_glob)) + add("-includeglob:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_undefined)) add("-includeoptional:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_delayload)) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 7bd5fb80749da2..ff7e21fa808f39 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -139,6 +139,8 @@ defm threads defm tsaware: B_disable<"tsaware", "Set the 'Terminal Server aware' flag", "Don't set the 'Terminal Server aware' flag">; defm undefined: Eq<"undefined", "Include symbol in the link, if available">; +defm undefined_glob: EEq<"undefined-glob", "Force undefined symbol during linking">, + MetaVarName<"">; defm whole_archive: B<"whole-archive", "Include all object files for following archives", "No longer include all object files for following archives">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6e043773f00379..da93da9196af70 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -45,6 +45,7 @@ COFF Improvements MinGW Improvements ------------------ +* ``--undefined-glob`` is now supported by translating into the ``/includeglob`` flag. MachO Improvements ------------------ diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 0dab66b613c774..2831d155fef128 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -249,6 +249,9 @@ REQUIRE-DEFINED: -include:_foo -include:_bar -include:_baz -include:_foo2 RUN: ld.lld -### foo.o -m i386pe -u _foo --undefined _bar -undefined=_baz --undefined=_foo2 -u_foo3 2>&1 | FileCheck -check-prefix=UNDEFINED %s UNDEFINED: -includeoptional:_foo -includeoptional:_bar -includeoptional:_baz -includeoptional:_foo2 -includeoptional:_foo3 +RUN: ld.lld -### foo.o -m i386pe --undefined-glob="_foo*" 2>&1 | FileCheck -check-prefix=UNDEFINED-GLOB %s +UNDEFINED-GLOB: -includeglob:_foo* + RUN: ld.lld -### -m i386pep foo.o -Llibpath 2>&1 | FileCheck -check-prefix LIBPATH %s LIBPATH: -libpath:libpath From ba3d174d3f9ec4fb98a837e4e5089094405e1d33 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:30:03 -0700 Subject: [PATCH 42/65] [FuzzMutate] Avoid repeated hash lookups (NFC) (#109903) --- llvm/lib/FuzzMutate/IRMutator.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp index 72e0de59376076..e1fe6c8d89ab05 100644 --- a/llvm/lib/FuzzMutate/IRMutator.cpp +++ b/llvm/lib/FuzzMutate/IRMutator.cpp @@ -623,9 +623,11 @@ void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { auto getAliveChildren = [&AliveInstsLookup](Instruction *I) { SmallSetVector Children; for (Value *U : I->users()) { - Instruction *P = dyn_cast(U); - if (P && AliveInstsLookup.count(P)) - Children.insert(AliveInstsLookup[P]); + if (Instruction *P = dyn_cast(U)) { + auto It = AliveInstsLookup.find(P); + if (It != AliveInstsLookup.end()) + Children.insert(It->second); + } } return Children; }; From ec60030639000daa6d64a92e088d74e65fcfc8a1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:30:40 -0700 Subject: [PATCH 43/65] [DWARFLinker] Avoid repeated hash lookups (NFC) (#109904) --- llvm/include/llvm/DWARFLinker/IndexedValuesMap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h index fadbeb168b5334..5e0779157473e8 100644 --- a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h +++ b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h @@ -21,11 +21,9 @@ namespace dwarf_linker { template class IndexedValuesMap { public: uint64_t getValueIndex(T Value) { - typename ValueToIndexMapTy::iterator It = ValueToIndexMap.find(Value); - if (It == ValueToIndexMap.end()) { - It = ValueToIndexMap.insert(std::make_pair(Value, Values.size())).first; + auto [It, Inserted] = ValueToIndexMap.try_emplace(Value, Values.size()); + if (Inserted) Values.push_back(Value); - } return It->second; } From c3fc763dc165df36e655bf687b96688c2e7184ec Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:31:33 -0700 Subject: [PATCH 44/65] [MCA] Avoid repeated hash lookups (NFC) (#109905) --- llvm/lib/MCA/HardwareUnits/ResourceManager.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp index 82030207eee616..e45bd00f1a2929 100644 --- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -322,12 +322,11 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { uint64_t ResourceMask = llvm::bit_floor(ReadyMask); - auto it = AvailableUnits.find(ResourceMask); - if (it == AvailableUnits.end()) { + auto [it, Inserted] = AvailableUnits.try_emplace(ResourceMask); + if (Inserted) { unsigned Index = getResourceStateIndex(ResourceMask); unsigned NumUnits = llvm::popcount(Resources[Index]->getReadyMask()); - it = - AvailableUnits.insert(std::make_pair(ResourceMask, NumUnits)).first; + it->second = NumUnits; } if (!it->second) { From a36aca5e189e8fd50af32060a25167e52e807f90 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:32:31 -0700 Subject: [PATCH 45/65] [sancov] Avoid repeated map lookups (NFC) (#109906) --- llvm/tools/sancov/sancov.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index dd51226e21311f..80f9996ba705ba 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -323,11 +323,10 @@ static void operator<<(json::OStream &W, for (const auto &Loc : Point->Locs) { if (Loc.FileName != FileName || Loc.FunctionName != FunctionName) continue; - if (WrittenIds.find(Point->Id) != WrittenIds.end()) + if (!WrittenIds.insert(Point->Id).second) continue; // Output : ":". - WrittenIds.insert(Point->Id); W.attribute(Point->Id, (utostr(Loc.Line) + ":" + utostr(Loc.Column))); } @@ -418,9 +417,6 @@ SymbolizedCoverage::read(const std::string &InputFile) { auto LineStr = Loc.substr(0, ColonPos); auto ColStr = Loc.substr(ColonPos + 1, Loc.size()); - if (Points.find(PointId) == Points.end()) - Points.insert(std::make_pair(PointId, CoveragePoint(PointId))); - DILineInfo LineInfo; LineInfo.FileName = Filename; LineInfo.FunctionName = FunctionName; @@ -428,7 +424,8 @@ SymbolizedCoverage::read(const std::string &InputFile) { LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10); LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10); - CoveragePoint *CoveragePoint = &Points.find(PointId)->second; + CoveragePoint *CoveragePoint = + &Points.try_emplace(PointId, PointId).first->second; CoveragePoint->Locs.push_back(LineInfo); } } @@ -576,10 +573,8 @@ getCoveragePoints(const std::string &ObjectFile, FrameInfo.FileName = normalizeFilename(FrameInfo.FileName); if (Ig.isIgnorelisted(FrameInfo)) continue; - if (Infos.find(FrameInfo) == Infos.end()) { - Infos.insert(FrameInfo); + if (Infos.insert(FrameInfo).second) Point.Locs.push_back(FrameInfo); - } } Result.push_back(Point); From 850ee790cfcafa39c4f00441e049e2862a597ad8 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 25 Sep 2024 08:32:44 +0000 Subject: [PATCH 46/65] [Bazel] Port fa824dc0dd960214865b03d8f56b18bb93e4a88b --- utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index bbb0435837e415..62f1c2a50acf7c 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -956,6 +956,9 @@ cc_library( ) + [ "include/llvm-c/Comdat.h", "include/llvm-c/DebugInfo.h", + "include/llvm/Analysis/ValueTracking.h", + "include/llvm/Analysis/SimplifyQuery.h", + "include/llvm/Analysis/WithCache.h", ] + [":llvm_intrinsics_headers"], copts = llvm_copts, textual_hdrs = glob(["include/llvm/IR/*.def"]), From d853adee004dcee7d5a4984539c07f08394cf2b4 Mon Sep 17 00:00:00 2001 From: Dominik Montada Date: Wed, 25 Sep 2024 10:47:14 +0200 Subject: [PATCH 47/65] [MIR] Fix return value when computed properties conflict with given prop (#109923) This fixes a test failure when expensive checks are enabled. Use the correct return value when computing machine function properties resulted in an error (e.g. when conflicting with explicitly set values). Without this, the machine verifier would crash even in the presence of parsing errors which should have gently terminated execution. --- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 +-- .../machine-function-isssa-conflict.mir | 14 ++++++++ .../machine-function-nophis-conflict.mir | 18 ++++++++++ .../machine-function-novregs-conflict.mir | 13 +++++++ ...ptionally-computed-properties-conflict.mir | 35 ------------------- 5 files changed, 47 insertions(+), 37 deletions(-) create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir delete mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 3a00b8ec4771dd..be07fbf478b1d8 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -625,10 +625,10 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MRI.freezeReservedRegs(); if (computeFunctionProperties(MF, YamlMF)) - return false; + return true; if (initializeCallSiteInfo(PFS, YamlMF)) - return false; + return true; setupDebugValueTracking(MF, PFS, YamlMF); diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir new file mode 100644 index 00000000000000..362d54db7033fe --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir @@ -0,0 +1,14 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestIsSSAOverrideConflict has explicit property IsSSA, but is not valid SSA +name: TestIsSSAOverrideConflict +isSSA: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF + %0:_(s32) = G_IMPLICIT_DEF +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir new file mode 100644 index 00000000000000..c113ea59a90496 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir @@ -0,0 +1,18 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestNoPhisOverrideConflict has explicit property NoPhi, but contains at least one PHI +name: TestNoPhisOverrideConflict +noPhis: true +tracksRegLiveness: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF + + bb.1: + %1:_(s32) = PHI %0, %bb.0, %1, %bb.1 + G_BR %bb.1 +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir new file mode 100644 index 00000000000000..5f394a4bbbdb62 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir @@ -0,0 +1,13 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestNoVRegsOverrideConflict has explicit property NoVRegs, but contains virtual registers +name: TestNoVRegsOverrideConflict +noVRegs: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir deleted file mode 100644 index d8d178d90ae0af..00000000000000 --- a/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir +++ /dev/null @@ -1,35 +0,0 @@ -# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s - -# Test that computed properties are not conflicting with explicitly set -# properties - ---- -# CHECK: error: {{.*}}: TestNoPhisOverrideConflict has explicit property NoPhi, but contains at least one PHI -name: TestNoPhisOverrideConflict -noPhis: true -tracksRegLiveness: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF - - bb.1: - %1:_(s32) = PHI %0, %bb.0, %1, %bb.1 - G_BR %bb.1 -... ---- -# CHECK: error: {{.*}}: TestIsSSAOverrideConflict has explicit property IsSSA, but is not valid SSA -name: TestIsSSAOverrideConflict -isSSA: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF - %0:_(s32) = G_IMPLICIT_DEF -... ---- -# CHECK: error: {{.*}}: TestNoVRegsOverrideConflict has explicit property NoVRegs, but contains virtual registers -name: TestNoVRegsOverrideConflict -noVRegs: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF -... From 53907ed5081b6cfde6cbe147ab06a074a4f3e0ed Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 25 Sep 2024 09:50:55 +0100 Subject: [PATCH 48/65] [clang][codegen] Don't mark "int" TBAA on FP libcalls with indirect args (#108853) On some targets, an FP libcall with argument types such as long double will be lowered to pass arguments indirectly via pointers. When this is the case we should not mark the libcall with "int" TBAA as it may lead to incorrect optimizations. Currently, this can be seen for long doubles on x86_64-w64-mingw32. The `load x86_fp80` after the call is (incorrectly) marked with "int" TBAA (overwriting the previous metadata for "long double"). Nothing seems to break due to this currently as the metadata is being incorrectly placed on the load and not the call. But if the metadata is moved to the call (which this patch ensures), LLVM will optimize out the setup for the arguments. --- clang/lib/CodeGen/CGBuiltin.cpp | 27 +- clang/lib/CodeGen/CGExpr.cpp | 6 +- clang/lib/CodeGen/CodeGenFunction.h | 3 +- .../math-libcalls-tbaa-indirect-args.c | 250 ++++++++++++++++++ 4 files changed, 280 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7bbf22569e73ca..566252b2636801 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -687,12 +687,31 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { return CGF.Builder.CreateICmpSLT(V, Zero); } +/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a +/// hidden pointer). This is used to check annotating FP libcalls (that could +/// set `errno`) with "int" TBAA metadata is safe. If any floating-point +/// arguments are passed indirectly, setup for the call could be incorrectly +/// optimized out. +static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo) { + auto IsIndirect = [&](ABIArgInfo const &info) { + return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca(); + }; + return !IsIndirect(FnInfo.getReturnInfo()) && + llvm::none_of(FnInfo.arguments(), + [&](CGFunctionInfoArgInfo const &ArgInfo) { + return IsIndirect(ArgInfo.info); + }); +} + static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); + llvm::CallBase *callOrInvoke = nullptr; + CGFunctionInfo const *FnInfo = nullptr; RValue Call = - CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); + CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(), + /*Chain=*/nullptr, &callOrInvoke, &FnInfo); if (unsigned BuiltinID = FD->getBuiltinID()) { // Check whether a FP math builtin function, such as BI__builtin_expf @@ -702,12 +721,12 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, // Restrict to target with errno, for example, MacOS doesn't set errno. // TODO: Support builtin function with complex type returned, eg: cacosh if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno && - !CGF.Builder.getIsFPConstrained() && Call.isScalar()) { + !CGF.Builder.getIsFPConstrained() && Call.isScalar() && + HasNoIndirectArgumentsOrResults(*FnInfo)) { // Emit "int" TBAA metadata on FP math libcalls. clang::QualType IntTy = Context.IntTy; TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy); - Instruction *Inst = cast(Call.getScalarVal()); - CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); + CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo); } } return Call; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 35b5daaf6d4b55..9166db4c74128c 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5932,7 +5932,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Value *Chain, - llvm::CallBase **CallOrInvoke) { + llvm::CallBase **CallOrInvoke, + CGFunctionInfo const **ResolvedFnInfo) { // Get the actual function type. The callee type will always be a pointer to // function type or a block pointer type. assert(CalleeType->isFunctionPointerType() && @@ -6111,6 +6112,9 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( Args, FnType, /*ChainCall=*/Chain); + if (ResolvedFnInfo) + *ResolvedFnInfo = &FnInfo; + // C99 6.5.2.2p6: // If the expression that denotes the called function has a type // that does not include a prototype, [the default argument diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index d4e46f77868eca..8a1f6ff00ada77 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4397,7 +4397,8 @@ class CodeGenFunction : public CodeGenTypeCache { } RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr, - llvm::CallBase **CallOrInvoke = nullptr); + llvm::CallBase **CallOrInvoke = nullptr, + CGFunctionInfo const **ResolvedFnInfo = nullptr); // If a Call or Invoke instruction was emitted for this CallExpr, this method // writes the pointer to `CallOrInvoke` if it's not null. diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c new file mode 100644 index 00000000000000..b94f9641decc8e --- /dev/null +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -0,0 +1,250 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 5 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s -check-prefixes=CHECK-WIN64 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-I686 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-PPC +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s -check-prefixes=CHECK-ARM +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s -check-prefixes=CHECK-ARM-HF +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s -check-prefixes=CHECK-THUMB +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple aarch64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-AARCH +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple spir -o - | FileCheck %s -check-prefixes=CHECK-SPIR +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-w64-mingw32 -o - | FileCheck %s -check-prefixes=CHECK-MINGW32 + +// This file checks that if arguments/results are passed indirectly (i.e. via +// pointers), then the "int" TBAA metadata is not set on the FP libcall as this +// can lead to optimizations incorrectly optimizing out the setup for the call. + +long double powl(long double a, long double b); + +// CHECK-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-WIN64-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-I686-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-PPC-LABEL: define dso_local ppc_fp128 @test_powl( +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]], ppc_fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-ARM-LABEL: define dso_local double @test_powl( +// CHECK-ARM-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-ARM: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-ARM-HF-LABEL: define dso_local double @test_powl( +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-THUMB-LABEL: define double @test_powl( +// CHECK-THUMB-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-AARCH-LABEL: define dso_local fp128 @test_powl( +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]], fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func double @test_powl( +// CHECK-SPIR-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-MINGW32-LABEL: define dso_local void @test_powl( +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(x86_fp80) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]], ptr nocapture noundef readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3:![0-9]+]] +// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr noundef nonnull [[BYVAL_TEMP]], ptr noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3:[0-9]+]] +// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA3]] +// +long double test_powl(long double a, long double b) { + return powl(a, b); +} + +// CHECK-LABEL: define dso_local { x86_fp80, x86_fp80 } @test_cargl( +// CHECK-SAME: ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] +// +// CHECK-WIN64-LABEL: define dso_local { x86_fp80, x86_fp80 } @test_cargl( +// CHECK-WIN64-SAME: ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-WIN64: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-WIN64: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-WIN64: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-WIN64: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] +// +// CHECK-I686-LABEL: define dso_local void @test_cargl( +// CHECK-I686-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 4 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-I686: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 4 +// CHECK-I686: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 4 +// CHECK-I686: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 4 +// CHECK-I686: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 4 +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 4 [[BYVAL_TEMP]]) #[[ATTR5]] +// CHECK-I686: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 4 +// CHECK-I686: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 4 +// +// CHECK-PPC-LABEL: define dso_local void @test_cargl( +// CHECK-PPC-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ ppc_fp128, ppc_fp128 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ ppc_fp128, ppc_fp128 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-PPC: [[CLD_REAL:%.*]] = load ppc_fp128, ptr [[CLD]], align 16 +// CHECK-PPC: [[CLD_IMAG:%.*]] = load ppc_fp128, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-PPC: store ppc_fp128 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-PPC: store ppc_fp128 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @cargl(ptr noundef nonnull byval({ ppc_fp128, ppc_fp128 }) align 16 [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-PPC: store ppc_fp128 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 +// CHECK-PPC: store ppc_fp128 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 +// +// CHECK-ARM-LABEL: define dso_local void @test_cargl( +// CHECK-ARM-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM: [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// CHECK-ARM: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 +// CHECK-ARM: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 +// +// CHECK-ARM-HF-LABEL: define dso_local { double, double } @test_cargl( +// CHECK-ARM-HF-SAME: { double, double } noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-THUMB-LABEL: define { double, double } @test_cargl( +// CHECK-THUMB-SAME: [2 x double] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-AARCH-LABEL: define dso_local { fp128, fp128 } @test_cargl( +// CHECK-AARCH-SAME: [2 x fp128] noundef alignstack(16) [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA2]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func void @test_cargl( +// CHECK-SPIR-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ double, double }) align 8 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SPIR: [[CLD_REAL:%.*]] = load double, ptr [[CLD]], align 8 +// CHECK-SPIR: [[CLD_IMAG:%.*]] = load double, ptr [[CLD_IMAGP:%.*]], align 8 +// CHECK-SPIR: store double [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 8 +// CHECK-SPIR: store double [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 8 +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @cargl(ptr noundef nonnull byval({ double, double }) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-SPIR: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 +// CHECK-SPIR: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 +// +// CHECK-MINGW32-LABEL: define dso_local void @test_cargl( +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[CLD:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-MINGW32: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-MINGW32: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-MINGW32: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-MINGW32: call void @cargl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: [[CLD_REAL3:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-MINGW32: [[CLD_IMAG5:%.*]] = load x86_fp80, ptr [[CLD_IMAGP]], align 16 +// CHECK-MINGW32: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 +// CHECK-MINGW32: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 +// +_Complex long double test_cargl(_Complex long double cld) { + long double v2 = __builtin_cargl(cld); + _Complex long double tmp = v2 * cld; + return tmp; +} + + +int ilogbl(long double a); + +// CHECK-LABEL: define dso_local i32 @test_ilogb( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// +// CHECK-WIN64-LABEL: define dso_local i32 @test_ilogb( +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-WIN64: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// +// CHECK-I686-LABEL: define dso_local i32 @test_ilogb( +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-I686: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA3]] +// +// CHECK-PPC-LABEL: define dso_local i32 @test_ilogb( +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-PPC: [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// +// CHECK-ARM-LABEL: define dso_local i32 @test_ilogb( +// CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-ARM-HF-LABEL: define dso_local i32 @test_ilogb( +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-THUMB-LABEL: define i32 @test_ilogb( +// CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-AARCH-LABEL: define dso_local i32 @test_ilogb( +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA2]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func i32 @test_ilogb( +// CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// +// CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb( +// CHECK-MINGW32-SAME: ptr nocapture noundef readonly [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: [[CALL:%.*]] = call i32 @ilogbl(ptr noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] +// +int test_ilogb(long double a) { + return ilogbl(a); +} +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-WIN64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-WIN64: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-WIN64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-WIN64: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-I686: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-I686: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-I686: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-I686: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-PPC: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-PPC: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-PPC: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-PPC: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-ARM: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-ARM: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-ARM: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-ARM-HF: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM-HF: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-ARM-HF: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-ARM-HF: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-THUMB: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-THUMB: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-THUMB: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-THUMB: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-AARCH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-AARCH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-AARCH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-AARCH: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-SPIR: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-SPIR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-SPIR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-SPIR: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-MINGW32: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-MINGW32: [[META4]] = !{!"long double", [[META5:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META6]] = !{!"Simple C/C++ TBAA"} +//. From 445d8b2d10b2bb9a5f50e3fe0671045acd309a04 Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Wed, 25 Sep 2024 09:53:23 +0100 Subject: [PATCH 49/65] [Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction (#97755) This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324 --- clang/include/clang/Basic/arm_sme.td | 5 ++ .../acle_sme2_luti4_zt.c | 82 +++++++++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 5 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 6 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 19 ++++- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- .../AArch64/sme2-intrinsics-write-zt.ll | 17 ++++ 7 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index ae6b55e98827ff..9c9f31f3884069 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -817,4 +817,9 @@ multiclass ZAReadzArray{ defm SVREADZ_VG2 : ZAReadzArray<"2">; defm SVREADZ_VG4 : ZAReadzArray<"4">; + +let SMETargetGuard = "sme2,sme-lutv2" in { + def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; +} + } // let SVETargetGuard = InvalidMode diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c new file mode 100644 index 00000000000000..47f36a8b000c07 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c @@ -0,0 +1,82 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s +// REQUIRES: aarch64-registered-target + +#include + +// CHECK-LABEL: define dso_local @test_luti4_zt_u8_x4( +// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_u8_x411svuint8x2_t( +// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-CXX-NEXT: ret [[TMP10]] +// +svuint8x4_t test_luti4_zt_u8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { + return svluti4_zt_u8_x4(0, op); +} + +// CHECK-LABEL: define dso_local @test_luti4_zt_s8_x4( +// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_s8_x411svuint8x2_t( +// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-CXX-NEXT: ret [[TMP10]] +// +svint8x4_t test_luti4_zt_s8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { + return svluti4_zt_s8_x4(0, op); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 5de97649af5d3a..d9bb6daf974d5b 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -350,3 +350,8 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16, svsudot_lane_za32_s8_vg1x2(slice_base, z_s8x2, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } + +void test_luti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { + // Check Zt tile 0 + svluti4_zt_u8_x4(1, op); // expected-error {{argument value 1 is outside the valid range [0, 0]}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 8ffa2d0878e116..bf11cb0ad9e0f7 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,6 +3762,12 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; + + def int_aarch64_sme_luti4_zt_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [ImmArg>, IntrNoMem, IntrHasSideEffects]>; + } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 69806c9c3fdbf7..ebd7f938289b5a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -401,7 +401,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { } void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, - uint32_t MaxImm); + uint32_t MaxImm, bool IsMultiVector = false); template bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { @@ -1977,15 +1977,23 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, - unsigned Opc, uint32_t MaxImm) { + unsigned Opc, uint32_t MaxImm, + bool IsMultiVector) { if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) if (Imm->getZExtValue() > MaxImm) return; SDValue ZtValue; + SmallVector Ops; if (!ImmToReg(Node->getOperand(2), ZtValue)) return; - SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; + Ops.push_back(ZtValue); + if (IsMultiVector) { + Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)})); + } else { + Ops.push_back(Node->getOperand(3)); + Ops.push_back(Node->getOperand(4)); + } SDLoc DL(Node); EVT VT = Node->getValueType(0); @@ -5507,6 +5515,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectMultiVectorLuti(Node, 2, Opc, 3); return; } + case Intrinsic::aarch64_sme_luti4_zt_x4: { + // Does not have immediate but it has 2ZPR input + SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 0, true); + return; + } } } break; case ISD::INTRINSIC_WO_CHAIN: { diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index ebe4121c944b1e..e2261694d658c5 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -940,7 +940,7 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>; let Predicates = [HasSME2, HasSME_LUTv2] in { defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>; -def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; +def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; } //[HasSME2, HasSME_LUTv2] let Predicates = [HasSME2p1, HasSME_LUTv2] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll new file mode 100644 index 00000000000000..778f31194baf45 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define {, , , } @test_luti4_zt_i8( %v0, %v1) #0 { +; CHECK-LABEL: test_luti4_zt_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: luti4 { z0.b - z3.b }, zt0, { z0, z1 } +; CHECK-NEXT: ret + %res = call {, , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, %v0, %v1) + ret {, , , } %res +} + +attributes #0 = { "target-features"="+sme2,+sme-lutv2"} From 5a191e3cd904b5fee202569e54fe04f91f1a697f Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 25 Sep 2024 09:54:49 +0100 Subject: [PATCH 50/65] [RISCV][test] Correct +experimental-ztso attr to +ztso in test This doesn't chane the test output, but means it's now testing what it's supposed to. --- llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index a9c8a4be7d2b47..b5e892c0ff6aca 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -3,13 +3,13 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s From f1bbabd6289a351430657a2eb3b57fffa8c6d248 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 25 Sep 2024 10:03:39 +0100 Subject: [PATCH 51/65] [ARM] Lower arm_neon_vbsl to ARMISD::VBSP and fold (vbsl x, y, y) to y (#109761) This helps clean up the patterns a little and will help share combines on both the intrinsic and VBSP. A combine is then added to fold away the VBSP if both the selected operands are the same. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 9 +++++ llvm/lib/Target/ARM/ARMInstrNEON.td | 54 +++++++++++-------------- llvm/test/CodeGen/ARM/vbsl.ll | 5 +-- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a03928b618df03..f891aece26848c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -17653,6 +17653,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, // No immediate versions of these to check for. break; + case Intrinsic::arm_neon_vbsl: { + SDLoc dl(N); + return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case Intrinsic::arm_mve_vqdmlah: case Intrinsic::arm_mve_vqdmlash: case Intrinsic::arm_mve_vqrdmlah: @@ -19072,6 +19077,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::VBSP: + if (N->getOperand(1) == N->getOperand(2)) + return N->getOperand(1); + return SDValue(); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (N->getConstantOperandVal(1)) { diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index fcabc9076e4d30..48dcbdb137123a 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5524,26 +5524,23 @@ def : Pat<(v16i8 (vnotq QPR:$src)), // with different register constraints; it just inserts copies. // That is why pseudo VBSP implemented. Is is expanded later into // VBIT/VBIF/VBSL taking into account register constraints to avoid copies. -def VBSPd - : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - IIC_VBINiD, "", - [(set DPR:$Vd, - (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def VBSPd : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + IIC_VBINiD, "", []>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), - (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), +def : Pat<(v8i8 (NEONvbsp (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), - (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), +def : Pat<(v4i16 (NEONvbsp (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), - (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), +def : Pat<(v2i32 (NEONvbsp (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), - (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), +def : Pat<(v2f32 (NEONvbsp (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), - (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), +def : Pat<(v1i64 (NEONvbsp (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), @@ -5560,26 +5557,23 @@ def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; } -def VBSPq - : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - IIC_VBINiQ, "", - [(set QPR:$Vd, - (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def VBSPq : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + IIC_VBINiQ, "", []>; let Predicates = [HasNEON] in { -def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), - (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), +def : Pat<(v16i8 (NEONvbsp (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), - (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), +def : Pat<(v8i16 (NEONvbsp (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), - (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), +def : Pat<(v4i32 (NEONvbsp (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), - (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), +def : Pat<(v4f32 (NEONvbsp (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), - (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), +def : Pat<(v2i64 (NEONvbsp (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll index d5aaf3e6f30bd3..0ef725fc91b547 100644 --- a/llvm/test/CodeGen/ARM/vbsl.ll +++ b/llvm/test/CodeGen/ARM/vbsl.ll @@ -264,8 +264,7 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: same_param_all: ; CHECK: @ %bb.0: -; CHECK-NEXT: vorr d0, d1, d1 -; CHECK-NEXT: vbsl d0, d1, d1 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b) ret <8 x i8> %vbsl.i @@ -274,7 +273,7 @@ define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: same_param_12: ; CHECK: @ %bb.0: -; CHECK-NEXT: vbsl d0, d1, d1 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b) ret <8 x i8> %vbsl.i From bbb3679ad8e5a6d2ed6432a8171465eadf9f73f8 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Wed, 25 Sep 2024 11:06:17 +0200 Subject: [PATCH 52/65] [SPIRV] Fix compilation error 'use of parameter from containing function' when building PR #106429 with gcc (#109924) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It appears that PR https://github.com/llvm/llvm-project/pull/106429 introduced an issue for builds with SPIRV Backend target when building with gcc, e.g.: ``` /llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp:263:36: error: use of parameter from containing function 263 | llvm::SyncScope::ID SubGroup = Ctx.getOrInsertSyncScopeID("subgroup"); | ^~~ /llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp:256:46: note: ‘llvm::LLVMContext& Ctx’ declared here 256 | SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { ``` This PR fixes this by removing struct and using static const variables instead. --- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index aec144f6f05861..a8016d42b0154f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -254,26 +254,27 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) { } SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { - static const struct { - // Named by - // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. - // We don't need aliases for Invocation and CrossDevice, as we already have - // them covered by "singlethread" and "" strings respectively (see - // implementation of LLVMContext::LLVMContext()). - llvm::SyncScope::ID SubGroup = Ctx.getOrInsertSyncScopeID("subgroup"); - llvm::SyncScope::ID WorkGroup = Ctx.getOrInsertSyncScopeID("workgroup"); - llvm::SyncScope::ID Device = Ctx.getOrInsertSyncScopeID("device"); - } SSIDs{}; + // Named by + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. + // We don't need aliases for Invocation and CrossDevice, as we already have + // them covered by "singlethread" and "" strings respectively (see + // implementation of LLVMContext::LLVMContext()). + static const llvm::SyncScope::ID SubGroup = + Ctx.getOrInsertSyncScopeID("subgroup"); + static const llvm::SyncScope::ID WorkGroup = + Ctx.getOrInsertSyncScopeID("workgroup"); + static const llvm::SyncScope::ID Device = + Ctx.getOrInsertSyncScopeID("device"); if (Id == llvm::SyncScope::SingleThread) return SPIRV::Scope::Invocation; else if (Id == llvm::SyncScope::System) return SPIRV::Scope::CrossDevice; - else if (Id == SSIDs.SubGroup) + else if (Id == SubGroup) return SPIRV::Scope::Subgroup; - else if (Id == SSIDs.WorkGroup) + else if (Id == WorkGroup) return SPIRV::Scope::Workgroup; - else if (Id == SSIDs.Device) + else if (Id == Device) return SPIRV::Scope::Device; return SPIRV::Scope::CrossDevice; } From 706821ba8ff9db829252581dd12d8c5ee2e7b3f0 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:05:33 +0100 Subject: [PATCH 53/65] [lldb][test] Skip TestConcurrentVFork on all Linux And given that it is only for Linux - effectively skip it, but in a way where we don't forget that it's Linux only. See https://github.com/llvm/llvm-project/issues/85084. This test times out on occasion on Arm, AArch64 and X86 Linux, which I saw just today in a buildkite build. Causing a failure that is 1. confusing because the PR wasn't for LLDB and 2. annoying to find in the giant log file (which isn't the test's fault, but it adds to the overhead). It's probably important to have this test running somewhere but right now it's causing too much noise to do so. --- .../fork/concurrent_vfork/TestConcurrentVFork.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py b/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py index dd9500c186b2c8..3b5efb834b1626 100644 --- a/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py +++ b/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py @@ -49,7 +49,7 @@ def follow_child_helper(self, use_fork, call_exec): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_vfork_no_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -59,7 +59,7 @@ def test_follow_parent_vfork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_fork_no_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-parent. @@ -69,7 +69,7 @@ def test_follow_parent_fork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_vfork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -79,7 +79,7 @@ def test_follow_parent_vfork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_fork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -89,7 +89,7 @@ def test_follow_parent_fork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_vfork_no_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child. @@ -99,7 +99,7 @@ def test_follow_child_vfork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_fork_no_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child. @@ -109,7 +109,7 @@ def test_follow_child_fork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_vfork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child. @@ -119,7 +119,7 @@ def test_follow_child_vfork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_fork_call_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child. From c93e29439b1ab8ef6873c385f152a06e3395cb59 Mon Sep 17 00:00:00 2001 From: dlav-sc Date: Wed, 25 Sep 2024 12:13:40 +0300 Subject: [PATCH 54/65] [lldb] fix vFile:open, vFile:unlink error codes (#106950) This patch makes gdb-server sends only GDB RSP supported error codes during vFile:open and vFile:unlink handling. --- .../GDBRemoteCommunicationServerCommon.cpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp index f9d37490e16aec..324db3db7eb4c7 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp @@ -496,6 +496,17 @@ GDBRemoteCommunicationServerCommon::Handle_qSpeedTest( return SendErrorResponse(7); } +static GDBErrno system_errno_to_gdb(int err) { + switch (err) { +#define HANDLE_ERRNO(name, value) \ + case name: \ + return GDB_##name; +#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def" + default: + return GDB_EUNKNOWN; + } +} + GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerCommon::Handle_vFile_Open( StringExtractorGDBRemote &packet) { @@ -522,9 +533,7 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open( } else { response.PutCString("-1"); std::error_code code = errorToErrorCode(file.takeError()); - if (code.category() == std::system_category()) { - response.Printf(",%x", code.value()); - } + response.Printf(",%x", system_errno_to_gdb(code.value())); } return SendPacketNoLock(response.GetString()); @@ -534,17 +543,6 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open( return SendErrorResponse(18); } -static GDBErrno system_errno_to_gdb(int err) { - switch (err) { -#define HANDLE_ERRNO(name, value) \ - case name: \ - return GDB_##name; -#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def" - default: - return GDB_EUNKNOWN; - } -} - GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerCommon::Handle_vFile_Close( StringExtractorGDBRemote &packet) { @@ -727,7 +725,8 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_unlink( packet.GetHexByteString(path); Status error(llvm::sys::fs::remove(path)); StreamString response; - response.Printf("F%x,%x", error.GetError(), error.GetError()); + response.Printf("F%x,%x", error.GetError(), + system_errno_to_gdb(error.GetError())); return SendPacketNoLock(response.GetString()); } From 4d459136f50bb931987fd7dbcd29564ee9cc13ae Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:16:58 +0100 Subject: [PATCH 55/65] [llvm][docs] Update the project and runtimes lists (#109788) And add a note to explain which variable to prefer if the project can go in both. --------- Co-authored-by: Nikita Popov --- llvm/docs/CMake.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index b5adb22d8f33b1..321bae48594cf9 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -577,7 +577,12 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: - ``clang;clang-tools-extra;cross-project-tests;libc;libclc;lld;lldb;openmp;polly;pstl`` + ``bolt;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly;pstl`` + + .. note:: + Some projects listed here can also go in ``LLVM_ENABLE_RUNTIMES``. They + should only appear in one of the two lists. If a project is a valid possiblity + for both, prefer putting it in ``LLVM_ENABLE_RUNTIMES``. **LLVM_ENABLE_RTTI**:BOOL Build LLVM with run-time type information. Defaults to OFF. @@ -594,7 +599,7 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: - ``compiler-rt;libc;libcxx;libcxxabi;libunwind;openmp`` + ``libc;libunwind;libcxxabi;pstl;libcxx;compiler-rt;openmp;llvm-libgcc;offload`` To enable all of them, use: From b7ea2643cefe1ad7b1b0feeea3ff64e458eef0a3 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:21:45 +0100 Subject: [PATCH 56/65] [llvm][docs] Fix RISCVUsage docs build error After changes in #109651. Warning, treated as error: /home/davspi01/work/open_source/llvm-project/llvm/docs/RISCVUsage.rst::Anonymous hyperlink mismatch: 1 references but 0 targets. In typical RST fashion, all that was missing was a space between the last word and the opening `<` of the link. --- llvm/docs/RISCVUsage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 7bea64792d472c..d22f642865bb3a 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -154,7 +154,7 @@ on support follow. ``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zaamo`` Assembly Support ``Zabha`` Supported - ``Zacas`` Supported (`See note<#riscv-zacas-note>`__) + ``Zacas`` Supported (`See note <#riscv-zacas-note>`__) ``Zalrsc`` Assembly Support ``Zama16b`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zawrs`` Assembly Support From 02f46d7fb8b2a2434b1597fb96f65d7f82f3aeac Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Wed, 25 Sep 2024 09:25:28 +0000 Subject: [PATCH 57/65] Revert "[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction (#97755)" Going to revert to Fix test in clang as it is failing This reverts commit 445d8b2d10b2bb9a5f50e3fe0671045acd309a04. --- clang/include/clang/Basic/arm_sme.td | 5 -- .../acle_sme2_luti4_zt.c | 82 ------------------- .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 5 -- llvm/include/llvm/IR/IntrinsicsAArch64.td | 6 -- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 19 +---- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- .../AArch64/sme2-intrinsics-write-zt.ll | 17 ---- 7 files changed, 4 insertions(+), 132 deletions(-) delete mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c delete mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 9c9f31f3884069..ae6b55e98827ff 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -817,9 +817,4 @@ multiclass ZAReadzArray{ defm SVREADZ_VG2 : ZAReadzArray<"2">; defm SVREADZ_VG4 : ZAReadzArray<"4">; - -let SMETargetGuard = "sme2,sme-lutv2" in { - def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; -} - } // let SVETargetGuard = InvalidMode diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c deleted file mode 100644 index 47f36a8b000c07..00000000000000 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c +++ /dev/null @@ -1,82 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX - -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s -// REQUIRES: aarch64-registered-target - -#include - -// CHECK-LABEL: define dso_local @test_luti4_zt_u8_x4( -// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-NEXT: ret [[TMP10]] -// -// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_u8_x411svuint8x2_t( -// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-CXX-NEXT: ret [[TMP10]] -// -svuint8x4_t test_luti4_zt_u8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { - return svluti4_zt_u8_x4(0, op); -} - -// CHECK-LABEL: define dso_local @test_luti4_zt_s8_x4( -// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-NEXT: ret [[TMP10]] -// -// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_s8_x411svuint8x2_t( -// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-CXX-NEXT: ret [[TMP10]] -// -svint8x4_t test_luti4_zt_s8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { - return svluti4_zt_s8_x4(0, op); -} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index d9bb6daf974d5b..5de97649af5d3a 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -350,8 +350,3 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16, svsudot_lane_za32_s8_vg1x2(slice_base, z_s8x2, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } - -void test_luti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { - // Check Zt tile 0 - svluti4_zt_u8_x4(1, op); // expected-error {{argument value 1 is outside the valid range [0, 0]}} -} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index bf11cb0ad9e0f7..8ffa2d0878e116 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,12 +3762,6 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; - - def int_aarch64_sme_luti4_zt_x4 - : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg>, IntrNoMem, IntrHasSideEffects]>; - } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ebd7f938289b5a..69806c9c3fdbf7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -401,7 +401,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { } void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, - uint32_t MaxImm, bool IsMultiVector = false); + uint32_t MaxImm); template bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { @@ -1977,23 +1977,15 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, - unsigned Opc, uint32_t MaxImm, - bool IsMultiVector) { + unsigned Opc, uint32_t MaxImm) { if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) if (Imm->getZExtValue() > MaxImm) return; SDValue ZtValue; - SmallVector Ops; if (!ImmToReg(Node->getOperand(2), ZtValue)) return; - Ops.push_back(ZtValue); - if (IsMultiVector) { - Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)})); - } else { - Ops.push_back(Node->getOperand(3)); - Ops.push_back(Node->getOperand(4)); - } + SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; SDLoc DL(Node); EVT VT = Node->getValueType(0); @@ -5515,11 +5507,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectMultiVectorLuti(Node, 2, Opc, 3); return; } - case Intrinsic::aarch64_sme_luti4_zt_x4: { - // Does not have immediate but it has 2ZPR input - SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 0, true); - return; - } } } break; case ISD::INTRINSIC_WO_CHAIN: { diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index e2261694d658c5..ebe4121c944b1e 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -940,7 +940,7 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>; let Predicates = [HasSME2, HasSME_LUTv2] in { defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>; -def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; +def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; } //[HasSME2, HasSME_LUTv2] let Predicates = [HasSME2p1, HasSME_LUTv2] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll deleted file mode 100644 index 778f31194baf45..00000000000000 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll +++ /dev/null @@ -1,17 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s - -target triple = "aarch64-linux" - -define {, , , } @test_luti4_zt_i8( %v0, %v1) #0 { -; CHECK-LABEL: test_luti4_zt_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: luti4 { z0.b - z3.b }, zt0, { z0, z1 } -; CHECK-NEXT: ret - %res = call {, , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, %v0, %v1) - ret {, , , } %res -} - -attributes #0 = { "target-features"="+sme2,+sme-lutv2"} From 5ee2deac0c3b85deaeb0031b4030db99d266abdc Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:27:57 +0100 Subject: [PATCH 58/65] [lldb][AArch64][Linux] Add Floating Point Mode Register (#106695) Otherwise known as FEAT_FPMR. This register controls the behaviour of floating point operations. https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/FPMR--Floating-point-Mode-Register As the current floating point register contexts are fixed size, this has been placed in a new set. Linux kernel patches have landed already, so you can cross check with those. To simplify testing we're not going to do any floating point operations, just read and write from the program and debugger to make sure each sees the other's values correctly. --- .../Python/lldbsuite/test/lldbtest.py | 3 + .../NativeRegisterContextLinux_arm64.cpp | 92 ++++++++++++++++++- .../Linux/NativeRegisterContextLinux_arm64.h | 12 +++ .../Utility/RegisterContextPOSIX_arm64.cpp | 4 + .../Utility/RegisterContextPOSIX_arm64.h | 1 + .../Utility/RegisterInfoPOSIX_arm64.cpp | 33 +++++++ .../Process/Utility/RegisterInfoPOSIX_arm64.h | 7 ++ lldb/test/API/linux/aarch64/fpmr/Makefile | 3 + .../aarch64/fpmr/TestAArch64LinuxFPMR.py | 58 ++++++++++++ lldb/test/API/linux/aarch64/fpmr/main.c | 41 +++++++++ 10 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 lldb/test/API/linux/aarch64/fpmr/Makefile create mode 100644 lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py create mode 100644 lldb/test/API/linux/aarch64/fpmr/main.c diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index df5a110cb5b309..c6b7ce84109c09 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1370,6 +1370,9 @@ def isAArch64PAuth(self): return True return self.isAArch64() and "paca" in self.getCPUInfo() + def isAArch64FPMR(self): + return self.isAArch64() and "fpmr" in self.getCPUInfo() + def isAArch64Windows(self): """Returns true if the architecture is AArch64 and platform windows.""" if self.getPlatform() == "windows": diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp index 1dd4fd41351333..6056f3001fed6e 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -60,10 +60,16 @@ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* Tagged address control register */ #endif +#ifndef NT_ARM_FPMR +#define NT_ARM_FPMR 0x40e /* Floating point mode register */ +#endif + #define HWCAP_PACA (1 << 30) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_FPMR (1UL << 48) + using namespace lldb; using namespace lldb_private; using namespace lldb_private::process_linux; @@ -139,8 +145,12 @@ NativeRegisterContextLinux::CreateHostNativeRegisterContextLinux( std::optional auxv_at_hwcap2 = process.GetAuxValue(AuxVector::AUXV_AT_HWCAP2); - if (auxv_at_hwcap2 && (*auxv_at_hwcap2 & HWCAP2_MTE)) - opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskMTE); + if (auxv_at_hwcap2) { + if (*auxv_at_hwcap2 & HWCAP2_MTE) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskMTE); + if (*auxv_at_hwcap2 & HWCAP2_FPMR) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskFPMR); + } opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskTLS); @@ -186,6 +196,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( std::fill(m_zt_reg.begin(), m_zt_reg.end(), 0); m_mte_ctrl_reg = 0; + m_fpmr_reg = 0; // 16 is just a maximum value, query hardware for actual watchpoint count m_max_hwp_supported = 16; @@ -201,6 +212,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( m_mte_ctrl_is_valid = false; m_tls_is_valid = false; m_zt_buffer_is_valid = false; + m_fpmr_is_valid = false; // SME adds the tpidr2 register m_tls_size = GetRegisterInfo().IsSSVEPresent() ? sizeof(m_tls_regs) @@ -413,6 +425,14 @@ NativeRegisterContextLinux_arm64::ReadRegister(const RegisterInfo *reg_info, assert(offset < GetSMEPseudoBufferSize()); src = (uint8_t *)GetSMEPseudoBuffer() + offset; } + } else if (IsFPMR(reg)) { + error = ReadFPMR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset - GetRegisterInfo().GetFPMROffset(); + assert(offset < GetFPMRBufferSize()); + src = (uint8_t *)GetFPMRBuffer() + offset; } else return Status::FromErrorString( "failed - register wasn't recognized to be a GPR or an FPR, " @@ -626,6 +646,17 @@ Status NativeRegisterContextLinux_arm64::WriteRegister( } else return Status::FromErrorString( "Writing to SVG or SVCR is not supported."); + } else if (IsFPMR(reg)) { + error = ReadFPMR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset - GetRegisterInfo().GetFPMROffset(); + assert(offset < GetFPMRBufferSize()); + dst = (uint8_t *)GetFPMRBuffer() + offset; + ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size); + + return WriteFPMR(); } return Status::FromErrorString("Failed to write register value"); @@ -640,6 +671,7 @@ enum RegisterSetType : uint32_t { TLS, SME, // ZA only, because SVCR and SVG are pseudo registers. SME2, // ZT only. + FPMR, }; static uint8_t *AddRegisterSetType(uint8_t *dst, @@ -720,6 +752,13 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) { return error; } + if (GetRegisterInfo().IsFPMRPresent()) { + cached_size += sizeof(RegisterSetType) + GetFPMRBufferSize(); + error = ReadFPMR(); + if (error.Fail()) + return error; + } + // tpidr is always present but tpidr2 depends on SME. cached_size += sizeof(RegisterSetType) + GetTLSBufferSize(); error = ReadTLS(); @@ -823,6 +862,11 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( GetMTEControlSize()); } + if (GetRegisterInfo().IsFPMRPresent()) { + dst = AddSavedRegisters(dst, RegisterSetType::FPMR, GetFPMRBuffer(), + GetFPMRBufferSize()); + } + dst = AddSavedRegisters(dst, RegisterSetType::TLS, GetTLSBuffer(), GetTLSBufferSize()); @@ -971,6 +1015,11 @@ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues( GetZTBuffer(), &src, GetZTBufferSize(), m_zt_buffer_is_valid, std::bind(&NativeRegisterContextLinux_arm64::WriteZT, this)); break; + case RegisterSetType::FPMR: + error = RestoreRegisters( + GetFPMRBuffer(), &src, GetFPMRBufferSize(), m_fpmr_is_valid, + std::bind(&NativeRegisterContextLinux_arm64::WriteFPMR, this)); + break; } if (error.Fail()) @@ -1014,6 +1063,10 @@ bool NativeRegisterContextLinux_arm64::IsTLS(unsigned reg) const { return GetRegisterInfo().IsTLSReg(reg); } +bool NativeRegisterContextLinux_arm64::IsFPMR(unsigned reg) const { + return GetRegisterInfo().IsFPMRReg(reg); +} + llvm::Error NativeRegisterContextLinux_arm64::ReadHardwareDebugInfo() { if (!m_refresh_hwdebug_info) { return llvm::Error::success(); @@ -1161,6 +1214,7 @@ void NativeRegisterContextLinux_arm64::InvalidateAllRegisters() { m_mte_ctrl_is_valid = false; m_tls_is_valid = false; m_zt_buffer_is_valid = false; + m_fpmr_is_valid = false; // Update SVE and ZA registers in case there is change in configuration. ConfigureRegisterContext(); @@ -1440,6 +1494,40 @@ Status NativeRegisterContextLinux_arm64::WriteZT() { return WriteRegisterSet(&ioVec, GetZTBufferSize(), NT_ARM_ZT); } +Status NativeRegisterContextLinux_arm64::ReadFPMR() { + Status error; + + if (m_fpmr_is_valid) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPMRBuffer(); + ioVec.iov_len = GetFPMRBufferSize(); + + error = ReadRegisterSet(&ioVec, GetFPMRBufferSize(), NT_ARM_FPMR); + + if (error.Success()) + m_fpmr_is_valid = true; + + return error; +} + +Status NativeRegisterContextLinux_arm64::WriteFPMR() { + Status error; + + error = ReadFPMR(); + if (error.Fail()) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPMRBuffer(); + ioVec.iov_len = GetFPMRBufferSize(); + + m_fpmr_is_valid = false; + + return WriteRegisterSet(&ioVec, GetFPMRBufferSize(), NT_ARM_FPMR); +} + void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() { // ConfigureRegisterContext gets called from InvalidateAllRegisters // on every stop and configures SVE vector length and whether we are in diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h index 6df7c3beefb824..16190b5492582b 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h @@ -84,6 +84,7 @@ class NativeRegisterContextLinux_arm64 bool m_sve_buffer_is_valid; bool m_mte_ctrl_is_valid; bool m_zt_buffer_is_valid; + bool m_fpmr_is_valid; bool m_sve_header_is_valid; bool m_za_buffer_is_valid; @@ -133,6 +134,8 @@ class NativeRegisterContextLinux_arm64 // SME2's ZT is a 512 bit register. std::array m_zt_reg; + uint64_t m_fpmr_reg; + bool IsGPR(unsigned reg) const; bool IsFPR(unsigned reg) const; @@ -174,11 +177,16 @@ class NativeRegisterContextLinux_arm64 // SVCR is a pseudo register and we do not allow writes to it. Status ReadSMEControl(); + Status ReadFPMR(); + + Status WriteFPMR(); + bool IsSVE(unsigned reg) const; bool IsSME(unsigned reg) const; bool IsPAuth(unsigned reg) const; bool IsMTE(unsigned reg) const; bool IsTLS(unsigned reg) const; + bool IsFPMR(unsigned reg) const; uint64_t GetSVERegVG() { return m_sve_header.vl / 8; } @@ -202,6 +210,8 @@ class NativeRegisterContextLinux_arm64 void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); } + void *GetFPMRBuffer() { return &m_fpmr_reg; } + size_t GetSVEHeaderSize() { return sizeof(m_sve_header); } size_t GetPACMaskSize() { return sizeof(m_pac_mask); } @@ -222,6 +232,8 @@ class NativeRegisterContextLinux_arm64 size_t GetZTBufferSize() { return m_zt_reg.size(); } + size_t GetFPMRBufferSize() { return sizeof(m_fpmr_reg); } + llvm::Error ReadHardwareDebugInfo() override; llvm::Error WriteHardwareDebugRegs(DREGType hwbType) override; diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp index 50e25568f2ae01..575e9c8c81cbf5 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp @@ -59,6 +59,10 @@ bool RegisterContextPOSIX_arm64::IsMTE(unsigned reg) const { return m_register_info_up->IsMTEReg(reg); } +bool RegisterContextPOSIX_arm64::IsFPMR(unsigned reg) const { + return m_register_info_up->IsFPMRReg(reg); +} + RegisterContextPOSIX_arm64::RegisterContextPOSIX_arm64( lldb_private::Thread &thread, std::unique_ptr register_info) diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h index b1226b25b4be10..35ad56c98a7aed 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h @@ -58,6 +58,7 @@ class RegisterContextPOSIX_arm64 : public lldb_private::RegisterContext { bool IsTLS(unsigned reg) const; bool IsSME(unsigned reg) const; bool IsMTE(unsigned reg) const; + bool IsFPMR(unsigned reg) const; bool IsSVEZ(unsigned reg) const { return m_register_info_up->IsSVEZReg(reg); } bool IsSVEP(unsigned reg) const { return m_register_info_up->IsSVEPReg(reg); } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 9f5872e5de7e9f..f51a93e1b2dcbd 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -94,6 +94,9 @@ static lldb_private::RegisterInfo g_register_infos_sme2[] = { {"zt0", nullptr, 64, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8, KIND_ALL_INVALID, nullptr, nullptr, nullptr}}; +static lldb_private::RegisterInfo g_register_infos_fpmr[] = { + DEFINE_EXTENSION_REG(fpmr)}; + // Number of register sets provided by this context. enum { k_num_gpr_registers = gpr_w28 - gpr_x0 + 1, @@ -105,6 +108,7 @@ enum { // SME2's ZT0 will also be added to this set if present. So this number is // only for SME1 registers. k_num_sme_register = 3, + k_num_fpmr_register = 1, k_num_register_sets_default = 2, k_num_register_sets = 3 }; @@ -214,6 +218,9 @@ static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { static const lldb_private::RegisterSet g_reg_set_sme_arm64 = { "Scalable Matrix Extension Registers", "sme", k_num_sme_register, nullptr}; +static const lldb_private::RegisterSet g_reg_set_fpmr_arm64 = { + "Floating Point Mode Register", "fpmr", k_num_fpmr_register, nullptr}; + RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) : lldb_private::RegisterInfoAndSetInterface(target_arch), @@ -263,6 +270,9 @@ RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( if (m_opt_regsets.AnySet(eRegsetMaskSSVE)) AddRegSetSME(m_opt_regsets.AnySet(eRegsetMaskZT)); + if (m_opt_regsets.AllSet(eRegsetMaskFPMR)) + AddRegSetFPMR(); + m_register_info_count = m_dynamic_reg_infos.size(); m_register_info_p = m_dynamic_reg_infos.data(); m_register_set_p = m_dynamic_reg_sets.data(); @@ -409,6 +419,21 @@ void RegisterInfoPOSIX_arm64::AddRegSetSME(bool has_zt) { m_dynamic_reg_infos[GetRegNumSVEVG()].invalidate_regs = vg_invalidates; } +void RegisterInfoPOSIX_arm64::AddRegSetFPMR() { + uint32_t fpmr_regnum = m_dynamic_reg_infos.size(); + m_fpmr_regnum_collection.push_back(fpmr_regnum); + m_dynamic_reg_infos.push_back(g_register_infos_fpmr[0]); + m_dynamic_reg_infos[fpmr_regnum].byte_offset = + m_dynamic_reg_infos[fpmr_regnum - 1].byte_offset + + m_dynamic_reg_infos[fpmr_regnum - 1].byte_size; + m_dynamic_reg_infos[fpmr_regnum].kinds[lldb::eRegisterKindLLDB] = fpmr_regnum; + + m_per_regset_regnum_range[m_register_set_count] = + std::make_pair(fpmr_regnum, fpmr_regnum + 1); + m_dynamic_reg_sets.push_back(g_reg_set_fpmr_arm64); + m_dynamic_reg_sets.back().registers = m_fpmr_regnum_collection.data(); +} + uint32_t RegisterInfoPOSIX_arm64::ConfigureVectorLengthSVE(uint32_t sve_vq) { // sve_vq contains SVE Quad vector length in context of AArch64 SVE. // SVE register infos if enabled cannot be disabled by selecting sve_vq = 0. @@ -532,6 +557,10 @@ bool RegisterInfoPOSIX_arm64::IsSMEReg(unsigned reg) const { return llvm::is_contained(m_sme_regnum_collection, reg); } +bool RegisterInfoPOSIX_arm64::IsFPMRReg(unsigned reg) const { + return llvm::is_contained(m_fpmr_regnum_collection, reg); +} + uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEZ0() const { return sve_z0; } uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEFFR() const { return sve_ffr; } @@ -561,3 +590,7 @@ uint32_t RegisterInfoPOSIX_arm64::GetTLSOffset() const { uint32_t RegisterInfoPOSIX_arm64::GetSMEOffset() const { return m_register_info_p[m_sme_regnum_collection[0]].byte_offset; } + +uint32_t RegisterInfoPOSIX_arm64::GetFPMROffset() const { + return m_register_info_p[m_fpmr_regnum_collection[0]].byte_offset; +} \ No newline at end of file diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h index 3b8171042c7326..16a951ef0935f0 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -32,6 +32,7 @@ class RegisterInfoPOSIX_arm64 eRegsetMaskTLS = 16, eRegsetMaskZA = 32, eRegsetMaskZT = 64, + eRegsetMaskFPMR = 128, eRegsetMaskDynamic = ~1, }; @@ -110,6 +111,8 @@ class RegisterInfoPOSIX_arm64 void AddRegSetSME(bool has_zt); + void AddRegSetFPMR(); + uint32_t ConfigureVectorLengthSVE(uint32_t sve_vq); void ConfigureVectorLengthZA(uint32_t za_vq); @@ -128,6 +131,7 @@ class RegisterInfoPOSIX_arm64 bool IsPAuthPresent() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } bool IsMTEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } bool IsTLSPresent() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); } + bool IsFPMRPresent() const { return m_opt_regsets.AnySet(eRegsetMaskFPMR); } bool IsSVEReg(unsigned reg) const; bool IsSVEZReg(unsigned reg) const; @@ -139,6 +143,7 @@ class RegisterInfoPOSIX_arm64 bool IsSMEReg(unsigned reg) const; bool IsSMERegZA(unsigned reg) const; bool IsSMERegZT(unsigned reg) const; + bool IsFPMRReg(unsigned reg) const; uint32_t GetRegNumSVEZ0() const; uint32_t GetRegNumSVEFFR() const; @@ -150,6 +155,7 @@ class RegisterInfoPOSIX_arm64 uint32_t GetMTEOffset() const; uint32_t GetTLSOffset() const; uint32_t GetSMEOffset() const; + uint32_t GetFPMROffset() const; private: typedef std::map> @@ -181,6 +187,7 @@ class RegisterInfoPOSIX_arm64 std::vector m_mte_regnum_collection; std::vector m_tls_regnum_collection; std::vector m_sme_regnum_collection; + std::vector m_fpmr_regnum_collection; }; #endif diff --git a/lldb/test/API/linux/aarch64/fpmr/Makefile b/lldb/test/API/linux/aarch64/fpmr/Makefile new file mode 100644 index 00000000000000..10495940055b63 --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py new file mode 100644 index 00000000000000..5a3b8f501095e9 --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py @@ -0,0 +1,58 @@ +""" +Test lldb's ability to read and write the AArch64 FPMR register. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class AArch64LinuxFPMR(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_fpmr_register(self): + if not self.isAArch64FPMR(): + self.skipTest("FPMR must be present.") + + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, + "main.c", + line_number("main.c", "// Set break point at this line."), + num_expected_locations=1, + ) + + self.runCmd("run", RUN_SUCCEEDED) + + if self.process().GetState() == lldb.eStateExited: + self.fail("Test program failed to run.") + + self.expect( + "thread list", + STOPPED_DUE_TO_BREAKPOINT, + substrs=["stopped", "stop reason = breakpoint"], + ) + + # This has been set by the program. + expected_fpmr = (0b101010 << 32) | 0b101 + self.expect( + "register read --all", + substrs=["Floating Point Mode Register", f"fpmr = {expected_fpmr:#018x}"], + ) + + # Write a value for the program to find. Same fields but with bit values + # inverted. + new_fpmr = (0b010101 << 32) | 0b010 + self.runCmd(f"register write fpmr {new_fpmr:#x}") + + # This value should be saved and restored after expressions. + self.runCmd("p expr_func()") + self.expect("register read fpmr", substrs=[f"fpmr = {new_fpmr:#018x}"]) + + # 0 means the program found the new value in the sysreg as expected. + self.expect("continue", substrs=["exited with status = 0"]) diff --git a/lldb/test/API/linux/aarch64/fpmr/main.c b/lldb/test/API/linux/aarch64/fpmr/main.c new file mode 100644 index 00000000000000..bdb7d8f40b64dd --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/main.c @@ -0,0 +1,41 @@ +#include +#include +#include + +#ifndef HWCAP2_FPMR +#define HWCAP2_FPMR (1UL << 48) +#endif + +uint64_t get_fpmr(void) { + uint64_t fpmr = 0; + __asm__ volatile("mrs %0, s3_3_c4_c4_2" : "=r"(fpmr)); + return fpmr; +} + +void set_fpmr(uint64_t value) { + __asm__ volatile("msr s3_3_c4_c4_2, %0" ::"r"(value)); +} + +// Set F8S1 (bits 0-2) and LSCALE2 (bits 37-32) (to prove we treat fpmr as 64 +// bit). +const uint64_t original_fpmr = (uint64_t)0b101010 << 32 | (uint64_t)0b101; + +void expr_func() { set_fpmr(original_fpmr); } + +int main(int argc, char *argv[]) { + if (!(getauxval(AT_HWCAP2) & HWCAP2_FPMR)) + return 1; + + // As FPMR controls a bunch of floating point options that are quite + // extensive, we're not going to run any floating point ops here. Instead just + // update the value from the debugger and check it from this program, and vice + // versa. + set_fpmr(original_fpmr); + + // Here the debugger checks it read back the value above, then writes in a new + // value. Note that the bits are flipped in the new value. + uint64_t new_fpmr = get_fpmr(); // Set break point at this line. + uint64_t expected_fpmr = ((uint64_t)0b010101 << 32) | (uint64_t)0b010; + + return new_fpmr == expected_fpmr ? 0 : 1; +} From 5a038230b0a61acb9ec6f6fdd380c7d3c8c3d673 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:34:21 +0100 Subject: [PATCH 59/65] [llvm][cmake] Error when a runtime is in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES (#109791) The documentation tells you not to do this: https://llvm.org/docs/CMake.html#llvm-related-variables But until now we did not enforce it. ``` $ cmake ../llvm-project/llvm/ -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="pstl" -DLLVM_ENABLE_RUNTIMES="libcxx;pstl" ``` ``` CMake Error at CMakeLists.txt:166 (message): Runtime project "pstl" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES. ``` --- llvm/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index c637febce1c1fe..330db65e85cabb 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -161,6 +161,12 @@ foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) endif() endforeach() +foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) + if ("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) + message(FATAL_ERROR "Runtime project \"${proj}\" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES.") + endif() +endforeach() + # Set a shorthand option to enable the GPU build of the 'libc' project. option(LIBC_GPU_BUILD "Enable the 'libc' project targeting the GPU" OFF) if(LIBC_GPU_BUILD) From c71b212285bd3b4ba3758d4db042a869f520862e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Sep 2024 11:52:35 +0400 Subject: [PATCH 60/65] ProfDataUtils: Avoid dyn_extract + assert (NFC) --- llvm/lib/IR/ProfDataUtils.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 992ce34e000343..4d2fbdde3f9f08 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -216,8 +216,7 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { if (ProfDataName->getString() == "branch_weights") { unsigned Offset = getBranchWeightOffset(ProfileData); for (unsigned Idx = Offset; Idx < ProfileData->getNumOperands(); ++Idx) { - auto *V = mdconst::dyn_extract(ProfileData->getOperand(Idx)); - assert(V && "Malformed branch_weight in MD_prof node"); + auto *V = mdconst::extract(ProfileData->getOperand(Idx)); TotalVal += V->getValue().getZExtValue(); } return true; From 0ef24aa549536e65fc3b23c4d21b6b76190d416e Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 25 Sep 2024 12:12:27 +0200 Subject: [PATCH 61/65] Fix for logic in combineExtract() (#108208) A (csmith) test case appeared where combineExtract() crashed when the input vector was a bitcast into a vector of i1:s. Fix this by adding a check with canTreatAsByteVector() before the call. --- .../Target/SystemZ/SystemZISelLowering.cpp | 5 +++-- .../SystemZ/DAGCombine_extract_vector_elt.ll | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3dabc5ef540cfb..ba105c12bc4e97 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7361,8 +7361,9 @@ SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( if (auto *IndexN = dyn_cast(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); EVT VecVT = Op0.getValueType(); - return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, - IndexN->getZExtValue(), DCI, false); + if (canTreatAsByteVector(VecVT)) + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); } return SDValue(); } diff --git a/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll new file mode 100644 index 00000000000000..d568af47dbafd0 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z16 < %s | FileCheck %s +; +; Check that DAGCombiner doesn't crash in SystemZ combineExtract() +; when handling EXTRACT_VECTOR_ELT with a vector of i1:s. + +define i32 @fun(i32 %arg) { +; CHECK-LABEL: fun: +entry: + %cc = icmp eq i32 %arg, 0 + br label %loop + +loop: + %P = phi <128 x i1> [ zeroinitializer, %entry ], [ bitcast (<2 x i64> to <128 x i1>), %loop ] + br i1 %cc, label %exit, label %loop + +exit: + %E = extractelement <128 x i1> %P, i64 0 + %Res = zext i1 %E to i32 + ret i32 %Res +} From 0c31ea5a09d854d5891eac40629f6a17a66fdcf7 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 25 Sep 2024 11:19:05 +0100 Subject: [PATCH 62/65] [Clang][SME2] Use tuple result of SME builtins directly. (#109423) I missed a codepath during PR108008 so SME2/SVE2p1 builtins are converting their struct return type into a large vector, which is causing unnecessary casting via memory. --- clang/lib/CodeGen/CGBuiltin.cpp | 66 +- clang/lib/CodeGen/CodeGenFunction.h | 8 +- .../aarch64-sme2-intrinsics/acle_sme2_clamp.c | 528 +----- .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 194 +- .../aarch64-sme2-intrinsics/acle_sme2_cvtl.c | 18 +- .../acle_sme2_faminmax.c | 264 +-- .../aarch64-sme2-intrinsics/acle_sme2_frint.c | 176 +- .../acle_sme2_luti2_lane_zt_x2.c | 162 +- .../acle_sme2_luti2_lane_zt_x4.c | 234 +-- .../acle_sme2_luti4_lane_zt_x2.c | 162 +- .../acle_sme2_luti4_lane_zt_x4.c | 182 +- .../aarch64-sme2-intrinsics/acle_sme2_max.c | 1056 +---------- .../aarch64-sme2-intrinsics/acle_sme2_maxnm.c | 352 +--- .../aarch64-sme2-intrinsics/acle_sme2_min.c | 1056 +---------- .../aarch64-sme2-intrinsics/acle_sme2_minnm.c | 352 +--- .../aarch64-sme2-intrinsics/acle_sme2_read.c | 1586 ++--------------- .../acle_sme2_sqdmulh.c | 352 +--- .../acle_sme2_unpkx2.c | 108 +- .../acle_sme2_unpkx4.c | 156 +- .../acle_sme2_vector_add.c | 352 +--- .../acle_sme2_vector_rshl.c | 704 +------- .../acle_sme2_vector_selx2.c | 216 +-- .../acle_sme2_vector_selx4.c | 312 +--- .../acle_sme2_vector_uzpx2.c | 432 +---- .../acle_sme2_vector_uzpx4.c | 624 +------ .../acle_sme2_vector_zipx2.c | 432 +---- .../acle_sme2_vector_zipx4.c | 624 +------ .../acle_sme2p1_movaz.c | 1584 ++-------------- .../acle_sve2p1_pext.c | 144 +- .../acle_sve2p1_while_x2.c | 576 ++---- 30 files changed, 1340 insertions(+), 11672 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 566252b2636801..249aead33ad73d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9880,6 +9880,22 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, return C; } +Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple, + llvm::StructType *Ty) { + if (PredTuple->getType() == Ty) + return PredTuple; + + Value *Ret = llvm::PoisonValue::get(Ty); + for (unsigned I = 0; I < Ty->getNumElements(); ++I) { + Value *Pred = Builder.CreateExtractValue(PredTuple, I); + Pred = EmitSVEPredicateCast( + Pred, cast(Ty->getTypeAtIndex(I))); + Ret = Builder.CreateInsertValue(Ret, Pred, I); + } + + return Ret; +} + Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -10386,41 +10402,6 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Tuple; } -Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { - // Multi-vector results should be broken up into a single (wide) result - // vector. - auto *StructTy = dyn_cast(Call->getType()); - if (!StructTy) - return Call; - - auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); - if (!VTy) - return Call; - unsigned N = StructTy->getNumElements(); - - // We may need to emit a cast to a svbool_t - bool IsPredTy = VTy->getElementType()->isIntegerTy(1); - unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); - - ScalableVectorType *WideVTy = - ScalableVectorType::get(VTy->getElementType(), MinElts * N); - Value *Ret = llvm::PoisonValue::get(WideVTy); - for (unsigned I = 0; I < N; ++I) { - Value *SRet = Builder.CreateExtractValue(Call, I); - assert(SRet->getType() == VTy && "Unexpected type for result value"); - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - - if (IsPredTy) - SRet = EmitSVEPredicateCast( - SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); - - Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); - } - Call = Ret; - - return Call; -} - void CodeGenFunction::GetAArch64SVEProcessedOperands( unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, SVETypeFlags TypeFlags) { @@ -10551,12 +10532,16 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, getSVEOverloadTypes(TypeFlags, Ty, Ops)); Value *Call = Builder.CreateCall(F, Ops); + if (Call->getType() == Ty) + return Call; + // Predicate results must be converted to svbool_t. - if (auto PredTy = dyn_cast(Call->getType())) - if (PredTy->getScalarType()->isIntegerTy(1)) - Call = EmitSVEPredicateCast(Call, cast(Ty)); + if (auto PredTy = dyn_cast(Ty)) + return EmitSVEPredicateCast(Call, PredTy); + if (auto PredTupleTy = dyn_cast(Ty)) + return EmitSVEPredicateTupleCast(Call, PredTupleTy); - return FormSVEBuiltinResult(Call); + llvm_unreachable("unsupported element count!"); } switch (BuiltinID) { @@ -10888,9 +10873,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, TypeFlags.isOverloadNone() ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); - Value *Call = Builder.CreateCall(F, Ops); - return FormSVEBuiltinResult(Call); + return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8a1f6ff00ada77..3e2abbd9bc1094 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4646,6 +4646,8 @@ class CodeGenFunction : public CodeGenTypeCache { unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); + llvm::Value *EmitSVEPredicateTupleCast(llvm::Value *PredTuple, + llvm::StructType *Ty); llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); @@ -4670,12 +4672,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); - /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider - /// vector. It extracts the scalable vector from the struct and inserts into - /// the wider vector. This avoids the error when allocating space in llvm - /// for struct of scalable vectors if a function returns struct. - llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); - llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c index 9c639984305d1b..1297185c4b50e0 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c @@ -24,27 +24,13 @@ // CHECK-LABEL: @test_svclamp_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x210svint8x2_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x2, , )(op1, op2, op3); @@ -52,27 +38,13 @@ svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x211svint16x2_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x2, , )(op1, op2, op3); @@ -80,27 +52,13 @@ svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x211svint32x2_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x2, , )(op1, op2, op3); @@ -108,27 +66,13 @@ svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x211svint64x2_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x2, , )(op1, op2, op3); @@ -139,35 +83,13 @@ svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x410svint8x4_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x4, , )(op1, op2, op3); @@ -175,35 +97,13 @@ svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x411svint16x4_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x4, , )(op1, op2, op3); @@ -211,35 +111,13 @@ svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x411svint32x4_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x4, , )(op1, op2, op3); @@ -247,35 +125,13 @@ svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x411svint64x4_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x4, , )(op1, op2, op3); @@ -288,27 +144,13 @@ svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x211svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x2, , )(op1, op2, op3); @@ -316,27 +158,13 @@ svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x212svuint16x2_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x2, , )(op1, op2, op3); @@ -344,27 +172,13 @@ svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x212svuint32x2_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x2, , )(op1, op2, op3); @@ -372,27 +186,13 @@ svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x212svuint64x2_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x2, , )(op1, op2, op3); @@ -403,35 +203,13 @@ svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x411svuint8x4_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x4, , )(op1, op2, op3); @@ -439,35 +217,13 @@ svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x412svuint16x4_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x4, , )(op1, op2, op3); @@ -475,35 +231,13 @@ svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x412svuint32x4_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x4, , )(op1, op2, op3); @@ -511,35 +245,13 @@ svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x412svuint64x4_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x4, , )(op1, op2, op3); @@ -552,27 +264,13 @@ svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x213svfloat16x2_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x2, , )(op1, op2, op3); @@ -580,27 +278,13 @@ svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x213svfloat32x2_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x2, , )(op1, op2, op3); @@ -609,27 +293,13 @@ svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x213svfloat64x2_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x2, , )(op1, op2, op3); @@ -640,35 +310,13 @@ svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x413svfloat16x4_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x4, , )(op1, op2, op3); @@ -676,35 +324,13 @@ svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x413svfloat32x4_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x4, , )(op1, op2, op3); @@ -712,35 +338,13 @@ svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x413svfloat64x4_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x4, , )(op1, op2, op3); @@ -748,27 +352,13 @@ svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x2, , )(op1, op2, op3); @@ -776,35 +366,13 @@ svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, // CHECK-LABEL: @test_svclamp_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svclamp_single_bf16_x4(svbfloat16x4_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x4, , )(op1, op2, op3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 2d61670fd60493..2851ea9ccd22c1 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -49,27 +49,13 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // x2 // CHECK-LABEL: @test_svcvt_f32_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x2,,)(zn); @@ -77,27 +63,13 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x2,,)(zn); @@ -105,27 +77,13 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x2,,)(zn); @@ -133,27 +91,13 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x2,,)(zn); @@ -162,35 +106,13 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // x4 // CHECK-LABEL: @test_svcvt_f32_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x4,,)(zn); @@ -198,35 +120,13 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x4,,)(zn); @@ -234,35 +134,13 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x4,,)(zn); @@ -270,35 +148,13 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svcvt_s32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x4,,)(zn); @@ -432,27 +288,13 @@ svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_cvt_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // __attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c index fc5c0376e925e0..5189ab4af8327a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_cvtl_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c index a1540bba2a8a96..d4d423f982e84a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svamax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x2)(zdn, zm); @@ -131,27 +75,13 @@ svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x2)(zdn, zm); @@ -159,27 +89,13 @@ svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x2)(zdn, zm); @@ -189,35 +105,13 @@ svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x4)(zdn, zm); @@ -225,35 +119,13 @@ svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x4)(zdn, zm); @@ -261,35 +133,13 @@ svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x4)(zdn, zm); @@ -297,35 +147,13 @@ svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x4)(zdn, zm); @@ -333,35 +161,13 @@ svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x4)(zdn, zm); @@ -369,35 +175,13 @@ svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c index abdb5a46d54532..8ab450587fc70d 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c @@ -21,27 +21,13 @@ // CHECK-LABEL: @test_svfrinta_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x2)(zn); @@ -49,35 +35,13 @@ svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrinta_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x4)(zn); @@ -87,27 +51,13 @@ svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintam_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svfrintam_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x2)(zn); @@ -115,35 +65,13 @@ svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintm_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintm_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x4)(zn); @@ -153,27 +81,13 @@ svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x2)(zn); @@ -181,35 +95,13 @@ svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x4)(zn); @@ -219,27 +111,13 @@ svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x2)(zn); @@ -247,35 +125,13 @@ svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 6dd55663d7d348..3b17c6d9edb198 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x2(0, zn, 7); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x2(0, zn, 7); @@ -67,27 +39,13 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x2(0, zn, 7); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x2(0, zn, 7); @@ -124,27 +68,13 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x2(0, zn, 7); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x2(0, zn, 7); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x2(0, zn, 7); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x2(0, zn, 7); @@ -236,27 +124,13 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x2(0, zn, 7); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 8650ec7f62dd83..38059019737f8a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -10,35 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x4(0, zn, 3); @@ -47,35 +25,13 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x4(0, zn, 3); @@ -83,35 +39,13 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x4(0, zn, 3); @@ -119,35 +53,13 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x4(0, zn, 3); @@ -155,35 +67,13 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x4(0, zn, 3); @@ -191,35 +81,13 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x4(0, zn, 3); @@ -227,35 +95,13 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x4(0, zn, 3); @@ -263,35 +109,13 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x4(0, zn, 3); @@ -299,35 +123,13 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x4(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index f4f11c9fc5b143..db615b3cd1c243 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti4_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8_x2(0, zn, 3); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti4_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8_x2(0, zn, 3); @@ -67,27 +39,13 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti4_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x2(0, zn, 3); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x2(0, zn, 3); @@ -124,27 +68,13 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x2(0, zn, 3); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti4_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x2(0, zn, 3); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti4_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x2(0, zn, 3); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x2(0, zn, 3); @@ -236,27 +124,13 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x2(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index 16a74213262357..c4c89358c16f8f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -11,36 +11,14 @@ // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x4(0, zn, 1); @@ -49,36 +27,14 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x4(0, zn, 1); @@ -87,36 +43,14 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_bf16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x4(0, zn, 1); @@ -125,36 +59,14 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x4(0, zn, 1); @@ -163,36 +75,14 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x4(0, zn, 1); @@ -201,36 +91,14 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x4(0, zn, 1); @@ -239,36 +107,14 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x4(0, zn, 1); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c index efc68c0b42334c..5d57ffb9bdf8ce 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmax_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c index 5d06895497cc7e..1d47abe8d487c2 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svmaxnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c index 2fa7feeee404e6..4e70a393116642 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmin_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c index 71b8914b816cad..838cb644e5e399 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svminnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index da17c6b13d17c8..b8cd1e1653ea98 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -9,27 +9,13 @@ // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); @@ -37,27 +23,13 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); @@ -65,27 +37,13 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); @@ -93,27 +51,13 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); @@ -121,35 +65,13 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); @@ -157,35 +79,13 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); @@ -193,35 +93,13 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_ver_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); @@ -229,35 +107,13 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); @@ -265,27 +121,13 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); @@ -293,27 +135,13 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); @@ -321,27 +149,13 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); @@ -349,27 +163,13 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); @@ -377,27 +177,13 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); @@ -405,27 +191,13 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); @@ -433,27 +205,13 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); @@ -461,27 +219,13 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); @@ -489,35 +233,13 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); @@ -525,35 +247,13 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); @@ -561,35 +261,13 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); @@ -597,35 +275,13 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); @@ -633,35 +289,13 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); @@ -669,35 +303,13 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); @@ -705,35 +317,13 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); @@ -741,35 +331,13 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); @@ -777,27 +345,13 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); @@ -805,27 +359,13 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); @@ -833,27 +373,13 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); @@ -861,27 +387,13 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); @@ -889,27 +401,13 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); @@ -917,27 +415,13 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); @@ -945,35 +429,13 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); @@ -981,35 +443,13 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); @@ -1017,35 +457,13 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); @@ -1053,35 +471,13 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); @@ -1089,35 +485,13 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); @@ -1125,35 +499,13 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); @@ -1161,27 +513,13 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); @@ -1189,27 +527,13 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); @@ -1217,27 +541,13 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); @@ -1245,27 +555,13 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); @@ -1273,55 +569,28 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); } +// // CHECK-LABEL: @test_svread_ver_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); @@ -1329,35 +598,13 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); @@ -1365,35 +612,13 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); @@ -1401,35 +626,13 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); @@ -1437,35 +640,13 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); @@ -1473,35 +654,13 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); @@ -1509,35 +668,13 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); @@ -1545,27 +682,13 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za8_s8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); @@ -1573,27 +696,13 @@ svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); @@ -1601,56 +710,27 @@ svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } -// // CHECK-LABEL: @test_svread_za16_u16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); @@ -1658,27 +738,13 @@ svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); @@ -1686,27 +752,13 @@ svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); @@ -1714,27 +766,13 @@ svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); @@ -1742,27 +780,13 @@ svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); @@ -1770,27 +794,13 @@ svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); @@ -1798,27 +808,13 @@ svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); @@ -1826,27 +822,13 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); @@ -1854,27 +836,13 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); @@ -1882,35 +850,13 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za8_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); @@ -1918,35 +864,13 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); @@ -1954,35 +878,13 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); @@ -1990,35 +892,13 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za16_u16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); @@ -2026,35 +906,13 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); @@ -2062,35 +920,13 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); @@ -2098,35 +934,13 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); @@ -2134,35 +948,13 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); @@ -2170,35 +962,13 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); @@ -2206,35 +976,13 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); @@ -2242,35 +990,13 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); @@ -2278,35 +1004,13 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c index 26804866a7563c..5ff801666df88a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svqdmulh_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x2,,,)(zdn, zm); @@ -133,35 +77,13 @@ svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x4,,,)(zdn, zm); @@ -169,35 +91,13 @@ svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x4,,,)(zdn, zm); @@ -205,35 +105,13 @@ svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x4,,,)(zdn, zm); @@ -241,35 +119,13 @@ svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x4,,,)(zdn, zm); @@ -279,27 +135,13 @@ svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x2,,,)(zdn, zm); @@ -307,27 +149,13 @@ svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x2,,,)(zdn, zm); @@ -335,27 +163,13 @@ svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x2,,,)(zdn, zm); @@ -363,27 +177,13 @@ svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x2,,,)(zdn, zm); @@ -393,35 +193,13 @@ svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x4,,,)(zdn, zm); @@ -429,35 +207,13 @@ svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x4,,,)(zdn, zm); @@ -465,35 +221,13 @@ svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x4,,,)(zdn, zm); @@ -501,35 +235,13 @@ svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c index fa66c4ff190142..d3b09f071c58f5 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x2u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x2)(zn); @@ -75,27 +47,13 @@ svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x2u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x2)(zn); @@ -103,27 +61,13 @@ svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x2u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x2)(zn); @@ -131,27 +75,13 @@ svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x2u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x2)(zn); @@ -159,27 +89,13 @@ svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x2u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svunpk_u64_x2(svuint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c index 61718f0984ef31..45bc83eac7339e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x410svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x411svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x4)(zn); @@ -91,35 +47,13 @@ svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x411svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x4)(zn); @@ -127,35 +61,13 @@ svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x412svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x4)(zn); @@ -163,35 +75,13 @@ svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x411svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x4)(zn); @@ -199,35 +89,13 @@ svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x412svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svunpk_u64_x4(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c index c118a7192c6ca8..de983bcf793093 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c @@ -25,27 +25,13 @@ // CHECK-LABEL: @test_svadd_vector_single2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); @@ -53,27 +39,13 @@ svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); @@ -81,27 +53,13 @@ svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); @@ -109,27 +67,13 @@ svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); @@ -137,27 +81,13 @@ svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); @@ -165,27 +95,13 @@ svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); @@ -193,27 +109,13 @@ svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); @@ -221,27 +123,13 @@ svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); @@ -252,35 +140,13 @@ svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); @@ -288,35 +154,13 @@ svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); @@ -324,35 +168,13 @@ svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); @@ -360,35 +182,13 @@ svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); @@ -396,35 +196,13 @@ svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); @@ -432,35 +210,13 @@ svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); @@ -468,35 +224,13 @@ svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); @@ -504,35 +238,13 @@ svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c index 87160444e3c0d1..af5a389c7f7362 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svrshl_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x2,,,)(zdn, zm); @@ -131,27 +75,13 @@ svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x2,,,)(zdn, zm); @@ -159,27 +89,13 @@ svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x2,,,)(zdn, zm); @@ -187,27 +103,13 @@ svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x2,,,)(zdn, zm); @@ -215,27 +117,13 @@ svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x2,,,)(zdn, zm); @@ -245,35 +133,13 @@ svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x4,,,)(zdn, zm); @@ -281,35 +147,13 @@ svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x4,,,)(zdn, zm); @@ -317,35 +161,13 @@ svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x4,,,)(zdn, zm); @@ -353,35 +175,13 @@ svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x4,,,)(zdn, zm); @@ -389,35 +189,13 @@ svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x4,,,)(zdn, zm); @@ -425,35 +203,13 @@ svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x4,,,)(zdn, zm); @@ -461,35 +217,13 @@ svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x4,,,)(zdn, zm); @@ -497,35 +231,13 @@ svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x4,,,)(zdn, zm); @@ -535,27 +247,13 @@ svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_multi_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x2,,,)(zdn, zm); @@ -563,27 +261,13 @@ svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x2,,,)(zdn, zm); @@ -591,27 +275,13 @@ svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x2,,,)(zdn, zm); @@ -619,27 +289,13 @@ svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x2,,,)(zdn, zm); @@ -647,27 +303,13 @@ svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x2,,,)(zdn, zm); @@ -675,27 +317,13 @@ svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x2,,,)(zdn, zm); @@ -703,27 +331,13 @@ svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x2,,,)(zdn, zm); @@ -731,27 +345,13 @@ svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x2,,,)(zdn, zm); @@ -761,35 +361,13 @@ svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x4,,,)(zdn, zm); @@ -797,35 +375,13 @@ svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x4,,,)(zdn, zm); @@ -833,35 +389,13 @@ svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x4,,,)(zdn, zm); @@ -869,35 +403,13 @@ svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x4,,,)(zdn, zm); @@ -905,35 +417,13 @@ svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x4,,,)(zdn, zm); @@ -941,35 +431,13 @@ svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x4,,,)(zdn, zm); @@ -977,35 +445,13 @@ svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x4,,,)(zdn, zm); @@ -1013,35 +459,13 @@ svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_multi_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c index a95f89faf77834..4047b2fbd19652 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x2u11__SVCount_t10svint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x2)(pn, zn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_st // CHECK-LABEL: @test_svsel_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x2u11__SVCount_t11svuint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x2)(pn, zn, zm); @@ -77,27 +49,13 @@ svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm // CHECK-LABEL: @test_svsel_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x2u11__SVCount_t11svint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x2)(pn, zn, zm); @@ -105,27 +63,13 @@ svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __ar // CHECK-LABEL: @test_svsel_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x2u11__SVCount_t12svuint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x2)(pn, zn, zm); @@ -133,27 +77,13 @@ svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) _ // CHECK-LABEL: @test_svsel_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x2u11__SVCount_t13svfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x2)(pn, zn, zm); @@ -161,27 +91,13 @@ svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm // CHECK-LABEL: @test_svsel_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x2u11__SVCount_t14svbfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x2)(pn, zn, zm); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_ // CHECK-LABEL: @test_svsel_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x2u11__SVCount_t11svint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x2)(pn, zn, zm); @@ -219,27 +121,13 @@ svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __ar // CHECK-LABEL: @test_svsel_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x2u11__SVCount_t12svuint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x2)(pn, zn, zm); @@ -247,27 +135,13 @@ svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) _ // CHECK-LABEL: @test_svsel_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x2u11__SVCount_t13svfloat32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x2)(pn, zn, zm); @@ -277,27 +151,13 @@ svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm // CHECK-LABEL: @test_svsel_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x2u11__SVCount_t11svint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x2)(pn, zn, zm); @@ -305,27 +165,13 @@ svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __ar // CHECK-LABEL: @test_svsel_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x2u11__SVCount_t12svuint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x2)(pn, zn, zm); @@ -333,27 +179,13 @@ svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) _ // CHECK-LABEL: @test_svsel_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x2u11__SVCount_t13svfloat64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svsel_f64_x2(svcount_t pn, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x2)(pn, zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c index 997b6acf962443..871d70943c9df4 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x4u11__SVCount_t10svint8x4_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x4)(pn, zn1, zn2); @@ -55,35 +33,13 @@ svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_ // CHECK-LABEL: @test_svsel_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x4u11__SVCount_t11svuint8x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svuint8x4_t zn3, svuint8x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x4)(pn, zn1, zn2); @@ -93,35 +49,13 @@ svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svu // CHECK-LABEL: @test_svsel_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x4u11__SVCount_t11svint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, svint16x4_t zn3, svint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x4)(pn, zn1, zn2); @@ -129,35 +63,13 @@ svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, sv // CHECK-LABEL: @test_svsel_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x4u11__SVCount_t12svuint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, svuint16x4_t zn3, svuint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x4)(pn, zn1, zn2); @@ -165,35 +77,13 @@ svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, // CHECK-LABEL: @test_svsel_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x4u11__SVCount_t13svfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t zn2, svfloat16x4_t zn3, svfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x4)(pn, zn1, zn2); @@ -201,35 +91,13 @@ svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t z // CHECK-LABEL: @test_svsel_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x4u11__SVCount_t14svbfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4_t zn2, svbfloat16x4_t zn3, svbfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x4)(pn, zn1, zn2); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4 // CHECK-LABEL: @test_svsel_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x4u11__SVCount_t11svint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, svint32x4_t zn3, svint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x4)(pn, zn1, zn2); @@ -275,35 +121,13 @@ svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, sv // CHECK-LABEL: @test_svsel_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x4u11__SVCount_t12svuint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, svuint32x4_t zn3, svuint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x4)(pn, zn1, zn2); @@ -311,35 +135,13 @@ svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, // CHECK-LABEL: @test_svsel_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x4u11__SVCount_t13svfloat32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t zn2, svfloat32x4_t zn3, svfloat32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x4)(pn, zn1, zn2); @@ -349,35 +151,13 @@ svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t z // CHECK-LABEL: @test_svsel_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x4u11__SVCount_t11svint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, svint64x4_t zn3, svint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x4)(pn, zn1, zn2); @@ -385,35 +165,13 @@ svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, sv // CHECK-LABEL: @test_svsel_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x4u11__SVCount_t12svuint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, svuint64x4_t zn3, svuint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x4)(pn, zn1, zn2); @@ -421,35 +179,13 @@ svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, // CHECK-LABEL: @test_svsel_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x4u11__SVCount_t13svfloat64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svsel_f64_x4(svcount_t pn, svfloat64x4_t zn1, svfloat64x4_t zn2, svfloat64x4_t zn3, svfloat64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x4)(pn, zn1, zn2); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c index de605bab67cc35..9a66ee5262082a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c @@ -20,27 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x2)(zn); @@ -48,27 +34,13 @@ svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x2)(zn); @@ -78,27 +50,13 @@ svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x2)(zn); @@ -106,27 +64,13 @@ svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x2)(zn); @@ -134,27 +78,13 @@ svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x2)(zn); @@ -162,27 +92,13 @@ svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x2)(zn); @@ -192,27 +108,13 @@ svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x2)(zn); @@ -220,27 +122,13 @@ svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x2)(zn); @@ -248,27 +136,13 @@ svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x2)(zn); @@ -278,27 +152,13 @@ svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x2)(zn); @@ -306,27 +166,13 @@ svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x2)(zn); @@ -334,27 +180,13 @@ svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x2)(zn); @@ -364,27 +196,13 @@ svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x2)(zn); @@ -392,27 +210,13 @@ svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x2)(zn); @@ -420,27 +224,13 @@ svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x2)(zn); @@ -448,27 +238,13 @@ svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x2)(zn); @@ -476,27 +252,13 @@ svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x2)(zn); @@ -504,27 +266,13 @@ svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x2)(zn); @@ -532,27 +280,13 @@ svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x2)(zn); @@ -560,27 +294,13 @@ svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x2)(zn); @@ -588,27 +308,13 @@ svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x2)(zn); @@ -616,27 +322,13 @@ svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x2)(zn); @@ -644,27 +336,13 @@ svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x2)(zn); @@ -672,27 +350,13 @@ svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzpq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c index aa210f59508b59..131928615edcd7 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c @@ -20,35 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x4)(zn); @@ -56,35 +34,13 @@ svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x4)(zn); @@ -94,35 +50,13 @@ svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x4)(zn); @@ -130,35 +64,13 @@ svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x4)(zn); @@ -166,35 +78,13 @@ svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x4)(zn); @@ -202,35 +92,13 @@ svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x4)(zn); @@ -240,35 +108,13 @@ svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x4)(zn); @@ -276,35 +122,13 @@ svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x4)(zn); @@ -312,35 +136,13 @@ svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x4)(zn); @@ -350,35 +152,13 @@ svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x4)(zn); @@ -386,35 +166,13 @@ svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x4)(zn); @@ -422,35 +180,13 @@ svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x4)(zn); @@ -460,35 +196,13 @@ svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x4)(zn); @@ -496,35 +210,13 @@ svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x4)(zn); @@ -532,35 +224,13 @@ svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x4)(zn); @@ -568,35 +238,13 @@ svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x4)(zn); @@ -604,35 +252,13 @@ svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x4)(zn); @@ -640,35 +266,13 @@ svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x4)(zn); @@ -676,35 +280,13 @@ svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x4)(zn); @@ -712,35 +294,13 @@ svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x4)(zn); @@ -748,35 +308,13 @@ svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x4)(zn); @@ -784,35 +322,13 @@ svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x4)(zn); @@ -820,35 +336,13 @@ svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x4)(zn); @@ -856,35 +350,13 @@ svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzpq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c index a29c347e3197f3..787b7d0b3ea1a0 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x2)(zn); @@ -77,27 +49,13 @@ svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x2)(zn); @@ -105,27 +63,13 @@ svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x2)(zn); @@ -133,27 +77,13 @@ svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x2)(zn); @@ -161,27 +91,13 @@ svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x2)(zn); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x2)(zn); @@ -219,27 +121,13 @@ svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x2)(zn); @@ -247,27 +135,13 @@ svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x2)(zn); @@ -277,27 +151,13 @@ svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x2)(zn); @@ -305,27 +165,13 @@ svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x2)(zn); @@ -333,27 +179,13 @@ svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x2)(zn); @@ -363,27 +195,13 @@ svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x2)(zn); @@ -391,27 +209,13 @@ svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x2)(zn); @@ -419,27 +223,13 @@ svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x2)(zn); @@ -447,27 +237,13 @@ svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x2)(zn); @@ -475,27 +251,13 @@ svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x2)(zn); @@ -503,27 +265,13 @@ svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x2)(zn); @@ -531,27 +279,13 @@ svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x2)(zn); @@ -559,27 +293,13 @@ svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x2)(zn); @@ -587,27 +307,13 @@ svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x2)(zn); @@ -615,27 +321,13 @@ svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x2)(zn); @@ -643,27 +335,13 @@ svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x2)(zn); @@ -671,27 +349,13 @@ svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzipq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c index be40ecb4bcaa35..9bea471bc98375 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x4)(zn); @@ -93,35 +49,13 @@ svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x4)(zn); @@ -129,35 +63,13 @@ svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x4)(zn); @@ -165,35 +77,13 @@ svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x4)(zn); @@ -201,35 +91,13 @@ svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x4)(zn); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x4)(zn); @@ -275,35 +121,13 @@ svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x4)(zn); @@ -311,35 +135,13 @@ svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x4)(zn); @@ -349,35 +151,13 @@ svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x4)(zn); @@ -385,35 +165,13 @@ svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x4)(zn); @@ -421,35 +179,13 @@ svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x4)(zn); @@ -459,35 +195,13 @@ svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x4)(zn); @@ -495,35 +209,13 @@ svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x4)(zn); @@ -531,35 +223,13 @@ svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x4)(zn); @@ -567,35 +237,13 @@ svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x4)(zn); @@ -603,35 +251,13 @@ svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x4)(zn); @@ -639,35 +265,13 @@ svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x4)(zn); @@ -675,35 +279,13 @@ svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x4)(zn); @@ -711,35 +293,13 @@ svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x4)(zn); @@ -747,35 +307,13 @@ svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x4)(zn); @@ -783,35 +321,13 @@ svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x4)(zn); @@ -819,35 +335,13 @@ svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x4)(zn); @@ -855,35 +349,13 @@ svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzipq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c index 77b02b4c4708fa..7fa2249827c4e0 100644 --- a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -11,28 +11,14 @@ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -42,28 +28,14 @@ svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -73,28 +45,14 @@ svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -104,28 +62,14 @@ svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -135,28 +79,14 @@ svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -166,28 +96,14 @@ svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_hor_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -198,28 +114,14 @@ svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -229,28 +131,14 @@ svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -260,28 +148,14 @@ svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -291,28 +165,14 @@ svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -322,28 +182,14 @@ svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -353,28 +199,14 @@ svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -389,28 +221,14 @@ svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -420,28 +238,14 @@ svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -451,28 +255,14 @@ svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -482,28 +272,14 @@ svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -513,28 +289,14 @@ svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -544,28 +306,14 @@ svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_ver_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -576,28 +324,14 @@ svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -607,28 +341,14 @@ svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -638,28 +358,14 @@ svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -669,28 +375,14 @@ svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -700,28 +392,14 @@ svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -731,28 +409,14 @@ svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -765,36 +429,14 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -804,36 +446,14 @@ svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -843,36 +463,14 @@ svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -882,36 +480,14 @@ svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -921,36 +497,14 @@ svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -960,36 +514,14 @@ svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_hor_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1000,36 +532,14 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1039,36 +549,14 @@ svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1078,36 +566,14 @@ svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1117,36 +583,14 @@ svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1156,36 +600,14 @@ svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1195,36 +617,14 @@ svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1236,36 +636,14 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1275,36 +653,14 @@ svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1314,36 +670,14 @@ svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1353,36 +687,14 @@ svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1392,36 +704,14 @@ svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1431,36 +721,14 @@ svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_ver_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1471,36 +739,14 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1510,36 +756,14 @@ svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1549,36 +773,14 @@ svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1588,36 +790,14 @@ svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1627,36 +807,14 @@ svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1666,36 +824,14 @@ svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_ver_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2120,28 +1256,14 @@ svfloat64_t test_svreadz_hor_za128_f64(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2151,28 +1273,14 @@ svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2182,28 +1290,14 @@ svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2213,28 +1307,14 @@ svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2244,28 +1324,14 @@ svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2275,28 +1341,14 @@ svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2306,28 +1358,14 @@ svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2337,28 +1375,14 @@ svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2368,28 +1392,14 @@ svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z25test_svreadz_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2399,28 +1409,14 @@ svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2430,28 +1426,14 @@ svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2461,28 +1443,14 @@ svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2496,36 +1464,14 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2535,36 +1481,14 @@ svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2574,36 +1498,14 @@ svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2613,36 +1515,14 @@ svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2652,36 +1532,14 @@ svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2691,36 +1549,14 @@ svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2730,36 +1566,14 @@ svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2769,36 +1583,14 @@ svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2808,36 +1600,14 @@ svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svreadz_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2847,36 +1617,14 @@ svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2886,36 +1634,14 @@ svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2925,36 +1651,14 @@ svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c index 6cea34ee52ef6d..deb126236ad57f 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -141,27 +141,13 @@ svbool_t test_svpext_lane_c64_3(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 0); @@ -169,27 +155,13 @@ svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 1); @@ -197,31 +169,25 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 0); @@ -229,31 +195,25 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 1); @@ -261,31 +221,25 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 0); @@ -293,31 +247,25 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 1); @@ -325,31 +273,25 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 0); @@ -357,31 +299,25 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_1(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 1); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c index 3fcc1dc6c819a4..612f2d25d40d02 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -23,28 +23,14 @@ // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2); @@ -53,28 +39,14 @@ svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2); @@ -83,32 +55,26 @@ svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2); @@ -117,32 +83,26 @@ svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2); @@ -151,32 +111,26 @@ svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2); @@ -185,32 +139,26 @@ svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2); @@ -219,32 +167,26 @@ svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2); @@ -253,32 +195,26 @@ svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2); @@ -287,28 +223,14 @@ svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2); @@ -317,28 +239,14 @@ svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2); @@ -347,32 +255,26 @@ svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2); @@ -381,32 +283,26 @@ svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2); @@ -415,32 +311,26 @@ svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2); @@ -449,32 +339,26 @@ svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2); @@ -483,32 +367,26 @@ svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2); @@ -517,32 +395,26 @@ svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2); @@ -551,28 +423,14 @@ svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2); @@ -581,28 +439,14 @@ svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2); @@ -611,32 +455,26 @@ svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2); @@ -645,32 +483,26 @@ svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2); @@ -679,32 +511,26 @@ svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2); @@ -713,32 +539,26 @@ svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2); @@ -747,32 +567,26 @@ svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2); @@ -781,32 +595,26 @@ svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2); @@ -815,28 +623,14 @@ svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2); @@ -845,28 +639,14 @@ svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2); @@ -875,32 +655,26 @@ svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2); @@ -909,32 +683,26 @@ svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2); @@ -943,32 +711,26 @@ svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2); @@ -977,32 +739,26 @@ svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2); @@ -1011,32 +767,26 @@ svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2); @@ -1045,32 +795,26 @@ svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2); From ce6c236c965dc1bb5fa2257e17ea253a015705cc Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 25 Sep 2024 11:23:58 +0100 Subject: [PATCH 63/65] [ADT][NFC] Simplify SmallSet (#109412) - Remove dependence on `STLExtras.h`. - Remove unused header inclusions. - Make `count` use `contains` for deduplication. - Replace hand-written linear scans on Vector by `std::find`. --- clang/lib/Basic/TargetID.cpp | 1 + llvm/include/llvm/ADT/SmallSet.h | 37 +++++++------------------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 3c06d9bad1dc0d..fa1bfec2aacb9c 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/TargetID.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/TargetParser.h" diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 630c98504261aa..8d7511bf0bc8d9 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -16,14 +16,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/type_traits.h" #include #include #include -#include #include namespace llvm { @@ -139,10 +135,6 @@ class SmallSet { SmallVector Vector; std::set Set; - using VIterator = typename SmallVector::const_iterator; - using SIterator = typename std::set::const_iterator; - using mutable_iterator = typename SmallVector::iterator; - // In small mode SmallPtrSet uses linear search for the elements, so it is // not a good idea to choose this value too high. You may consider using a // DenseSet<> instead if you expect many elements in the set. @@ -163,13 +155,7 @@ class SmallSet { } /// count - Return 1 if the element is in the set, 0 otherwise. - size_type count(const T &V) const { - if (isSmall()) { - // Since the collection is small, just do a linear search. - return vfind(V) == Vector.end() ? 0 : 1; - } - return Set.count(V); - } + size_type count(const T &V) const { return contains(V) ? 1 : 0; } /// insert - Insert an element into the set if it isn't already there. /// Returns a pair. The first value of it is an iterator to the inserted @@ -181,7 +167,7 @@ class SmallSet { return std::make_pair(const_iterator(I), Inserted); } - VIterator I = vfind(V); + auto I = std::find(Vector.begin(), Vector.end(), V); if (I != Vector.end()) // Don't reinsert if it already exists. return std::make_pair(const_iterator(I), false); if (Vector.size() < N) { @@ -206,11 +192,11 @@ class SmallSet { bool erase(const T &V) { if (!isSmall()) return Set.erase(V); - for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) { - Vector.erase(I); - return true; - } + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) { + Vector.erase(I); + return true; + } return false; } @@ -234,19 +220,12 @@ class SmallSet { /// Check if the SmallSet contains the given element. bool contains(const T &V) const { if (isSmall()) - return vfind(V) != Vector.end(); + return std::find(Vector.begin(), Vector.end(), V) != Vector.end(); return Set.find(V) != Set.end(); } private: bool isSmall() const { return Set.empty(); } - - VIterator vfind(const T &V) const { - for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) - return I; - return Vector.end(); - } }; /// If this set is of pointer values, transparently switch over to using From e4688b98cd2b86035a2b563a8db0819710d6275a Mon Sep 17 00:00:00 2001 From: Chengjun Date: Wed, 25 Sep 2024 03:41:13 -0700 Subject: [PATCH 64/65] [SimplifyCFG] Avoid increasing too many phi entries when removing empty blocks (#104887) Now in the simplifycfg and jumpthreading passes, we will remove the empty blocks (blocks only have phis and an unconditional branch). However, in some cases, this will increase size of the IR and slow down the compile of other passes dramatically. For example, we have the following CFG: 1. BB1 has 100 predecessors, and unconditionally branches to BB2 (does not have any other instructions). 2. BB2 has 100 phis. Then in this case, if we remove BB1, for every phi in BB2, we need to increase 99 entries (replace the incoming edge from BB1 with 100 edges from its predecessors). Then in total, we will increase 9900 phi entries, which can slow down the compile time for many other passes. Therefore, in this change, we add a check to see whether removing the empty blocks will increase lots of phi entries. Now, the threshold is 1000 (can be controlled by the command line option `max-phi-entries-increase-after-removing-empty-block`), which means that we will not remove an empty block if it will increase the total number of phi entries by 1000. This threshold is conservative and for most of the cases, we will not have such a large phi. So, this will only be triggered in some unusual IRs. --- llvm/lib/Transforms/Utils/Local.cpp | 35 +++- .../SimplifyCFG/avoid-complex-phi.ll | 164 ++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 725b512fb86e76..7659fc69196151 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -112,6 +112,12 @@ static cl::opt PHICSENumPHISmallSize( "When the basic block contains not more than this number of PHI nodes, " "perform a (faster!) exhaustive search instead of set-driven one.")); +static cl::opt MaxPhiEntriesIncreaseAfterRemovingEmptyBlock( + "max-phi-entries-increase-after-removing-empty-block", cl::init(1000), + cl::Hidden, + cl::desc("Stop removing an empty block if removing it will introduce more " + "than this number of phi entries in its successor")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms. static const unsigned BitPartRecursionMaxDepth = 48; @@ -1047,6 +1053,33 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, return true; } +/// Check whether removing \p BB will make the phis in its \p Succ have too +/// many incoming entries. This function does not check whether \p BB is +/// foldable or not. +static bool introduceTooManyPhiEntries(BasicBlock *BB, BasicBlock *Succ) { + // If BB only has one predecessor, then removing it will not introduce more + // incoming edges for phis. + if (BB->hasNPredecessors(1)) + return false; + unsigned NumPreds = pred_size(BB); + unsigned NumChangedPhi = 0; + for (auto &Phi : Succ->phis()) { + // If the incoming value is a phi and the phi is defined in BB, + // then removing BB will not increase the total phi entries of the ir. + if (auto *IncomingPhi = dyn_cast(Phi.getIncomingValueForBlock(BB))) + if (IncomingPhi->getParent() == BB) + continue; + // Otherwise, we need to add entries to the phi + NumChangedPhi++; + } + // For every phi that needs to be changed, (NumPreds - 1) new entries will be + // added. If the total increase in phi entries exceeds + // MaxPhiEntriesIncreaseAfterRemovingEmptyBlock, it will be considered as + // introducing too many new phi entries. + return (NumPreds - 1) * NumChangedPhi > + MaxPhiEntriesIncreaseAfterRemovingEmptyBlock; +} + /// Replace a value flowing from a block to a phi with /// potentially multiple instances of that value flowing from the /// block's predecessors to the phi. @@ -1146,7 +1179,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, BBKillable || CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred); - if (!BBKillable && !BBPhisMergeable) + if ((!BBKillable && !BBPhisMergeable) || introduceTooManyPhiEntries(BB, Succ)) return false; // Check to see if merging these blocks/phis would cause conflicts for any of diff --git a/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll new file mode 100644 index 00000000000000..c24fae7aa67bbe --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=12 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-12 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=11 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-11 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=4 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-4 %s +; +; This test has the following CFG: +; 1. entry has a switch to 4 blocks: B1 - B4 +; 2. For B1 and B2, it branches to B5 and B6 +; 3. For B3 and B4, it branches to B5 and B7 +; 4. In B5, %val is defined as phi taking values from B1 to B4 +; 5. B5, B6, B7 branch to block Merge unconditionally +; 6. Block Merge has 5 phis(%x1 - %x4 and %val_merge). +; +; If we remove B5, %x1 - %x4 will increase the number of phi entries by (4 - 1) * 4 = 12. For %val_merge, since the value taking from B5 +; is defined in B5, it will not increase the number of phi entries (it can be considered as move the entries from %val to +; %val_merge). Therefore, removing B5 will increase the number of phi entries by 12 (not (4 - 1) * 5 = 15). +; +; If we remove B6 / B7, it will increase the number of phi entries by (2 - 1) * 5 = 5. +; +; In the first test, max-phi-entries-increase-after-removing-empty-block is set to be 12, then B5 will be removed. +; In the second test, max-phi-entries-increase-after-removing-empty-block is set to be 11, then B5 should not be removed, +; but B6 and B7 can be removed. +; In the third test, max-phi-entries-increase-after-removing-empty-block is set to be 4, then no BB can be removed. +; +define void @foo(i32 %a, i32 %val1, i32 %val2, i32 %val3, i32 %val4) { +; CHECK-12-LABEL: define void @foo( +; CHECK-12-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-12-NEXT: [[ENTRY:.*:]] +; CHECK-12-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-12-NEXT: i32 4, label %[[B4:.*]] +; CHECK-12-NEXT: i32 2, label %[[B2:.*]] +; CHECK-12-NEXT: i32 3, label %[[B3:.*]] +; CHECK-12-NEXT: ] +; CHECK-12: [[B1]]: +; CHECK-12-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-12-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[MERGE:.*]] +; CHECK-12: [[B2]]: +; CHECK-12-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-12-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[MERGE]] +; CHECK-12: [[B3]]: +; CHECK-12-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-12-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[MERGE]] +; CHECK-12: [[B4]]: +; CHECK-12-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-12-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[MERGE]] +; CHECK-12: [[B6]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[B7]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[MERGE]]: +; CHECK-12-NEXT: [[X1:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 1, %[[B4]] ], [ 1, %[[B3]] ], [ 1, %[[B2]] ], [ 1, %[[B1]] ] +; CHECK-12-NEXT: [[X2:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ], [ 2, %[[B2]] ], [ 2, %[[B1]] ] +; CHECK-12-NEXT: [[X3:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 3, %[[B4]] ], [ 3, %[[B3]] ], [ 3, %[[B2]] ], [ 3, %[[B1]] ] +; CHECK-12-NEXT: [[X4:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 4, %[[B4]] ], [ 4, %[[B3]] ], [ 4, %[[B2]] ], [ 4, %[[B1]] ] +; CHECK-12-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-12-NEXT: ret void +; +; CHECK-11-LABEL: define void @foo( +; CHECK-11-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-11-NEXT: [[ENTRY:.*:]] +; CHECK-11-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-11-NEXT: i32 4, label %[[B4:.*]] +; CHECK-11-NEXT: i32 2, label %[[B2:.*]] +; CHECK-11-NEXT: i32 3, label %[[B3:.*]] +; CHECK-11-NEXT: ] +; CHECK-11: [[B1]]: +; CHECK-11-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-11-NEXT: br i1 [[CMP1]], label %[[MERGE:.*]], label %[[B5:.*]] +; CHECK-11: [[B2]]: +; CHECK-11-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-11-NEXT: br i1 [[CMP2]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B3]]: +; CHECK-11-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-11-NEXT: br i1 [[CMP3]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B4]]: +; CHECK-11-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-11-NEXT: br i1 [[CMP4]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B5]]: +; CHECK-11-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-11-NEXT: br label %[[MERGE]] +; CHECK-11: [[MERGE]]: +; CHECK-11-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: ret void +; +; CHECK-4-LABEL: define void @foo( +; CHECK-4-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-4-NEXT: [[ENTRY:.*:]] +; CHECK-4-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-4-NEXT: i32 4, label %[[B4:.*]] +; CHECK-4-NEXT: i32 2, label %[[B2:.*]] +; CHECK-4-NEXT: i32 3, label %[[B3:.*]] +; CHECK-4-NEXT: ] +; CHECK-4: [[B1]]: +; CHECK-4-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-4-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[B5:.*]] +; CHECK-4: [[B2]]: +; CHECK-4-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-4-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[B5]] +; CHECK-4: [[B3]]: +; CHECK-4-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-4-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[B5]] +; CHECK-4: [[B4]]: +; CHECK-4-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-4-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[B5]] +; CHECK-4: [[B5]]: +; CHECK-4-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-4-NEXT: br label %[[MERGE:.*]] +; CHECK-4: [[B6]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[B7]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[MERGE]]: +; CHECK-4-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: ret void +; +entry: + switch i32 %a, label %B1 [ + i32 4, label %B4 + i32 2, label %B2 + i32 3, label %B3 + ] + +B1: ; preds = %entry + %cmp1 = icmp eq i32 %val1, 1 + br i1 %cmp1, label %B6, label %B5 + +B2: ; preds = %entry + %cmp2 = icmp eq i32 %val2, 2 + br i1 %cmp2, label %B6, label %B5 + +B3: ; preds = %entry + %cmp3 = icmp eq i32 %val3, 3 + br i1 %cmp3, label %B7, label %B5 + +B4: ; preds = %entry + %cmp4 = icmp eq i32 %val4, 4 + br i1 %cmp4, label %B7, label %B5 + +B5: ; preds = %B4, %B3, %B2, %B1 + %val = phi i32 [ %val1, %B1 ], [ %val2, %B2 ], [ %val3, %B3 ], [ %val4, %B4 ] + br label %Merge + +B6: ; preds = %B2, %B1 + br label %Merge + +B7: ; preds = %B4, %B3 + br label %Merge + +Merge: ; preds = %B7, %B6, %B5 + %x1 = phi i16 [ 1, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x2 = phi i16 [ 2, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x3 = phi i16 [ 3, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x4 = phi i16 [ 4, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %val_merge = phi i32 [ %val, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + ret void +} From de70b959b152a071c3d788492a3a37470163af55 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Wed, 25 Sep 2024 12:42:57 +0200 Subject: [PATCH 65/65] [AMDGPU] Fix typo in promoteUniformOpToI32 (#109942) --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 08f2ff4566b674..2464361d4eece3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6797,8 +6797,7 @@ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op, LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS}); // Special case: for shifts, the RHS always needs a zext. - if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL || - Op.getOpcode() == ISD::SRA) + if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS}); else RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});