diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake index 6d5f75ca0074ee..c93ff40ff3ee48 100644 --- a/clang/cmake/caches/Release.cmake +++ b/clang/cmake/caches/Release.cmake @@ -55,14 +55,22 @@ set(STAGE1_RUNTIMES "compiler-rt") if (LLVM_RELEASE_ENABLE_PGO) list(APPEND STAGE1_PROJECTS "lld") - set(CLANG_BOOTSTRAP_TARGETS + set(tmp_targets generate-profdata stage2-package stage2-clang + stage2 stage2-install stage2-check-all stage2-check-llvm - stage2-check-clang CACHE STRING "") + stage2-check-clang) + + foreach(X IN LISTS LLVM_RELEASE_FINAL_STAGE_TARGETS) + list(APPEND tmp_targets "stage2-${X}") + endforeach() + list(REMOVE_DUPLICATES tmp_targets) + + set(CLANG_BOOTSTRAP_TARGETS "${tmp_targets}" CACHE STRING "") # Configuration for stage2-instrumented set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE STRING "") diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c44ae18276716a..b56d1ef6320835 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -283,6 +283,8 @@ Improvements to Clang's diagnostics - The lifetimebound and GSL analysis in clang are coherent, allowing clang to detect more use-after-free bugs. (#GH100549). +- Clang now warns for u8 character literals used in C23 with ``-Wpre-c23-compat`` instead of ``-Wpre-c++17-compat``. + Improvements to Clang's time-trace ---------------------------------- @@ -360,6 +362,8 @@ Bug Fixes to C++ Support - Fix an issue with dependent source location expressions (#GH106428), (#GH81155), (#GH80210), (#GH85373) - Fixed a bug in the substitution of empty pack indexing types. (#GH105903) - Clang no longer tries to capture non-odr used default arguments of template parameters of generic lambdas (#GH107048) +- Fixed a bug where defaulted comparison operators would remove ``const`` from base classes. (#GH102588) + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 12d7b8c0205ee9..fc14bb6aa21651 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -283,6 +283,9 @@ def warn_cxx98_compat_unicode_literal : Warning< def warn_cxx14_compat_u8_character_literal : Warning< "unicode literals are incompatible with C++ standards before C++17">, InGroup, DefaultIgnore; +def warn_c17_compat_u8_character_literal : Warning< + "unicode literals are incompatible with C standards before C23">, + InGroup, DefaultIgnore; def warn_cxx11_compat_user_defined_literal : Warning< "identifier after literal will be treated as a user-defined literal suffix " "in C++11">, InGroup, DefaultIgnore; diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index d3ccc7ef81c079..e0f1ea435d54e4 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -724,7 +724,7 @@ class SourceManager : public RefCountedBase { /// /// Negative FileIDs are indexes into this table. To get from ID to an index, /// use (-ID - 2). - llvm::PagedVector LoadedSLocEntryTable; + llvm::PagedVector LoadedSLocEntryTable; /// For each allocation in LoadedSLocEntryTable, we keep the first FileID. /// We assume exactly one allocation per AST file, and use that to determine diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp index d77bb1d424f7cf..06ce5ed6a64756 100644 --- a/clang/lib/ExtractAPI/DeclarationFragments.cpp +++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp @@ -276,6 +276,19 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType( DeclarationFragments Fragments; + if (const MacroQualifiedType *MQT = dyn_cast(T)) { + Fragments.append( + getFragmentsForType(MQT->getUnderlyingType(), Context, After)); + return Fragments; + } + + if (const AttributedType *AT = dyn_cast(T)) { + // FIXME: Serialize Attributes correctly + Fragments.append( + getFragmentsForType(AT->getModifiedType(), Context, After)); + return Fragments; + } + // An ElaboratedType is a sugar for types that are referred to using an // elaborated keyword, e.g., `struct S`, `enum E`, or (in C++) via a // qualified name, e.g., `N::M::type`, or both. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index ef1e1f4bd9aeb4..8647e9f2f27c3d 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2428,7 +2428,9 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, ? diag::warn_cxx98_compat_unicode_literal : diag::warn_c99_compat_unicode_literal); else if (Kind == tok::utf8_char_constant) - Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal); + Diag(BufferPtr, LangOpts.CPlusPlus + ? diag::warn_cxx14_compat_u8_character_literal + : diag::warn_c17_compat_u8_character_literal); } char C = getAndAdvanceChar(CurPtr, Result); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 6187cd12019e37..c0f1a25104f4fa 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -8476,10 +8476,12 @@ class DefaultedComparisonSynthesizer if (Obj.first.isInvalid() || Obj.second.isInvalid()) return {ExprError(), ExprError()}; CXXCastPath Path = {Base}; - return {S.ImpCastExprToType(Obj.first.get(), Base->getType(), - CK_DerivedToBase, VK_LValue, &Path), - S.ImpCastExprToType(Obj.second.get(), Base->getType(), - CK_DerivedToBase, VK_LValue, &Path)}; + const auto CastToBase = [&](Expr *E) { + QualType ToType = S.Context.getQualifiedType( + Base->getType(), E->getType().getQualifiers()); + return S.ImpCastExprToType(E, ToType, CK_DerivedToBase, VK_LValue, &Path); + }; + return {CastToBase(Obj.first.get()), CastToBase(Obj.second.get())}; } ExprPair getField(FieldDecl *Field) { diff --git a/clang/test/ExtractAPI/attributed-typedef.m b/clang/test/ExtractAPI/attributed-typedef.m new file mode 100644 index 00000000000000..c948c873ab759c --- /dev/null +++ b/clang/test/ExtractAPI/attributed-typedef.m @@ -0,0 +1,24 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -triple arm64-apple-macosx -x objective-c-header %s -o %t/output.symbols.json + +_Pragma("clang assume_nonnull begin") + +struct Foo { int a; }; +typedef struct Foo *Bar; +// RUN: FileCheck %s -input-file %t/output.symbols.json --check-prefix FUNC +void func(Bar b); +// FUNC-LABEL: "!testLabel": "c:@F@func", +// CHECK-NOT: Foo +// CHECK: "pathComponents" + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix THING +#define SWIFT_NAME(_name) __attribute__((swift_name(#_name))) +extern Bar const thing SWIFT_NAME(swiftThing); +// THING-LABEL: "!testLabel": "c:@thing" +// THING-NOT: Foo +// THING: "pathComponents" + +_Pragma("clang assume_nonnull end") + +// expected-no-diagnostics diff --git a/clang/test/Sema/pre-c2x-compat.c b/clang/test/Sema/pre-c2x-compat.c index fad472f1f72d54..15bb9b58349fad 100644 --- a/clang/test/Sema/pre-c2x-compat.c +++ b/clang/test/Sema/pre-c2x-compat.c @@ -1,3 +1,4 @@ // RUN: %clang_cc1 %s -std=c2x -Wpre-c2x-compat -pedantic -fsyntax-only -verify int digit_seps = 123'456; // expected-warning {{digit separators are incompatible with C standards before C23}} +unsigned char u8_char = u8'x'; // expected-warning {{unicode literals are incompatible with C standards before C23}} diff --git a/clang/test/SemaCXX/cxx20-default-compare.cpp b/clang/test/SemaCXX/cxx20-default-compare.cpp index 7074ee885ac4a2..3e4673c31e4890 100644 --- a/clang/test/SemaCXX/cxx20-default-compare.cpp +++ b/clang/test/SemaCXX/cxx20-default-compare.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s -std=c++23 -verify -Wfloat-equal +#include "Inputs/std-compare.h" + struct Foo { float val; bool operator==(const Foo &) const; @@ -15,3 +17,51 @@ bool operator==(const Foo &, const Foo &) = default; // expected-warning {{comp // Declare the defaulted comparison function as a non-member function. Arguments are passed by value. bool operator==(Foo, Foo) = default; // expected-warning {{comparing floating point with == or != is unsafe}} expected-note {{in defaulted equality comparison operator for 'Foo' first required here}} + +namespace GH102588 { +struct A { + int i = 0; + constexpr operator int() const { return i; } + constexpr operator int&() { return ++i; } +}; + +struct B : A { + bool operator==(const B &) const = default; +}; + +constexpr bool f() { + B x; + return x == x; +} + +static_assert(f()); + +struct ConstOnly { + std::strong_ordering operator<=>(const ConstOnly&) const; + std::strong_ordering operator<=>(ConstOnly&) = delete; + friend bool operator==(const ConstOnly&, const ConstOnly&); + friend bool operator==(ConstOnly&, ConstOnly&) = delete; +}; + +struct MutOnly { + std::strong_ordering operator<=>(const MutOnly&) const = delete;; + std::strong_ordering operator<=>(MutOnly&); + friend bool operator==(const MutOnly&, const MutOnly&) = delete;; + friend bool operator==(MutOnly&, MutOnly&); +}; + +struct ConstCheck : ConstOnly { + friend std::strong_ordering operator<=>(const ConstCheck&, const ConstCheck&) = default; + std::strong_ordering operator<=>(ConstCheck const& __restrict) const __restrict = default; + friend bool operator==(const ConstCheck&, const ConstCheck&) = default; + bool operator==(this const ConstCheck&, const ConstCheck&) = default; +}; + +// FIXME: Non-reference explicit object parameter are rejected +struct MutCheck : MutOnly { + friend bool operator==(MutCheck, MutCheck) = default; + // std::strong_ordering operator<=>(this MutCheck, MutCheck) = default; + friend std::strong_ordering operator<=>(MutCheck, MutCheck) = default; + // bool operator==(this MutCheck, MutCheck) = default; +}; +} diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h index 3fcca6d82cb33a..52ecd0bb0ba118 100644 --- a/llvm/include/llvm/ADT/PagedVector.h +++ b/llvm/include/llvm/ADT/PagedVector.h @@ -84,7 +84,7 @@ template class PagedVector { assert(Index / PageSize < PageToDataPtrs.size()); T *&PagePtr = PageToDataPtrs[Index / PageSize]; // If the page was not yet allocated, allocate it. - if (!PagePtr) { + if (LLVM_UNLIKELY(!PagePtr)) { PagePtr = Allocator.getPointer()->template Allocate(PageSize); // We need to invoke the default constructor on all the elements of the // page. diff --git a/llvm/include/llvm/CodeGen/DebugHandlerBase.h b/llvm/include/llvm/CodeGen/DebugHandlerBase.h index 9a62ad396127db..f9b56496d22c33 100644 --- a/llvm/include/llvm/CodeGen/DebugHandlerBase.h +++ b/llvm/include/llvm/CodeGen/DebugHandlerBase.h @@ -74,7 +74,7 @@ class DebugHandlerBase { /// This location indicates end of function prologue and beginning of /// function body. - DebugLoc PrologEndLoc; + const MachineInstr *PrologEndLoc; /// This block includes epilogue instructions. const MachineBasicBlock *EpilogBeginBlock = nullptr; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 49ce13dd8cbe39..65c5788ac5cc9f 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2278,15 +2278,6 @@ class TargetInstrInfo : public MCInstrInfo { llvm_unreachable("unknown number of operands necessary"); } - /// Gets the opcode for the Pseudo Instruction used to initialize - /// the undef value. If no Instruction is available, this will - /// fail compilation. - virtual unsigned getUndefInitOpcode(unsigned RegClassID) const { - (void)RegClassID; - - llvm_unreachable("Unexpected register class."); - } - private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 41983082b2543b..44ca1424ab2aaf 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -56,6 +56,11 @@ HANDLE_TARGET_OPCODE(INSERT_SUBREG) /// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef. HANDLE_TARGET_OPCODE(IMPLICIT_DEF) +/// Explicit undef initialization used past IMPLICIT_DEF elimination in cases +/// where an undef operand must be allocated to a different register than an +/// early-clobber result operand. +HANDLE_TARGET_OPCODE(INIT_UNDEF) + /// SUBREG_TO_REG - Assert the value of bits in a super register. /// The result of this instruction is the value of the second operand inserted /// into the subregister specified by the third operand. All other bits are diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 88c0988fbe6e76..2c06fd0eaa3f94 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1254,6 +1254,13 @@ def IMPLICIT_DEF : StandardPseudoInstruction { let isAsCheapAsAMove = true; let isMeta = true; } +def INIT_UNDEF : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins); + let AsmString = ""; + let hasSideEffects = false; + let Size = 0; +} def SUBREG_TO_REG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4307399c9b3a73..0dea1e0f018daa 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1879,6 +1879,10 @@ void AsmPrinter::emitFunctionBody() { // This instruction is only used to note jump table debug info, it's // purely meta information. break; + case TargetOpcode::INIT_UNDEF: + // This is only used to influence register allocation behavior, no + // actual initialization is needed. + break; default: emitInstruction(&MI); if (CanDoExtraAnalysis) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 29c0d4fc5b14ab..d0b08a75454b57 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2268,9 +2268,9 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // (The new location might be an explicit line 0, which we do emit.) if (DL.getLine() == 0 && LastAsmLine == 0) return; - if (DL == PrologEndLoc) { + if (MI == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT; - PrologEndLoc = DebugLoc(); + PrologEndLoc = nullptr; } // If the line changed, we call that a new statement; unless we went to // line 0 and came back, in which case it is not a new statement. We also @@ -2288,10 +2288,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { PrevInstLoc = DL; } -static std::pair findPrologueEndLoc(const MachineFunction *MF) { +static std::pair +findPrologueEndLoc(const MachineFunction *MF) { // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. - DebugLoc LineZeroLoc; + const MachineInstr *LineZeroLoc = nullptr; const Function &F = MF->getFunction(); // Some instructions may be inserted into prologue after this function. Must @@ -2308,9 +2309,9 @@ static std::pair findPrologueEndLoc(const MachineFunction *MF) { // meaningful breakpoint. If none is found, return the first // location after the frame setup. if (MI.getDebugLoc().getLine()) - return std::make_pair(MI.getDebugLoc(), IsEmptyPrologue); + return std::make_pair(&MI, IsEmptyPrologue); - LineZeroLoc = MI.getDebugLoc(); + LineZeroLoc = &MI; } IsEmptyPrologue = false; } @@ -2341,10 +2342,10 @@ static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col, Discriminator, Fn); } -DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, - unsigned CUID) { - std::pair PrologEnd = findPrologueEndLoc(&MF); - DebugLoc PrologEndLoc = PrologEnd.first; +const MachineInstr * +DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, unsigned CUID) { + std::pair PrologEnd = findPrologueEndLoc(&MF); + const MachineInstr *PrologEndLoc = PrologEnd.first; bool IsEmptyPrologue = PrologEnd.second; // Get beginning of function. @@ -2355,16 +2356,15 @@ DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, // Ensure the compile unit is created if the function is called before // beginFunction(). - (void)getOrCreateDwarfCompileUnit( - MF.getFunction().getSubprogram()->getUnit()); + DISubprogram *SP = MF.getFunction().getSubprogram(); + (void)getOrCreateDwarfCompileUnit(SP->getUnit()); // We'd like to list the prologue as "not statements" but GDB behaves // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT, CUID, getDwarfVersion(), getUnits()); return PrologEndLoc; } - return DebugLoc(); + return nullptr; } // Gather pre-function debug information. Assumes being called immediately diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7873e8163d79c5..1f33f6bd527bd2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -757,8 +757,10 @@ class DwarfDebug : public DebugHandlerBase { /// Emit all Dwarf sections that should come after the content. void endModule() override; - /// Emits inital debug location directive. - DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID); + /// Emits inital debug location directive. Returns instruction at which + /// the function prologue ends. + const MachineInstr *emitInitialLocDirective(const MachineFunction &MF, + unsigned CUID); /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 7c1b90afd495e7..8d20f2668de6b9 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -177,8 +177,7 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, Register TmpInitSubReg = MRI->createVirtualRegister(SubRegClass); LLVM_DEBUG(dbgs() << "Register Class ID" << SubRegClass->getID() << "\n"); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), - TII->get(TII->getUndefInitOpcode(SubRegClass->getID())), - TmpInitSubReg); + TII->get(TargetOpcode::INIT_UNDEF), TmpInitSubReg); Register NewReg = MRI->createVirtualRegister(TargetRegClass); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(TargetOpcode::INSERT_SUBREG), NewReg) @@ -203,9 +202,9 @@ bool InitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { const TargetRegisterClass *TargetRegClass = TRI->getLargestSuperClass(MRI->getRegClass(MO.getReg())); LLVM_DEBUG(dbgs() << "Register Class ID" << TargetRegClass->getID() << "\n"); - unsigned Opcode = TII->getUndefInitOpcode(TargetRegClass->getID()); Register NewReg = MRI->createVirtualRegister(TargetRegClass); - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::INIT_UNDEF), NewReg); MO.setReg(NewReg); if (MO.isUndef()) MO.setIsUndef(false); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5baad97a0c4ad3..0f5bd7bc83a76e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22186,6 +22186,59 @@ performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); } +// Convert zext(extract(shuffle a, b, [0,4,8,12])) -> and(uzp1(a, b), 255) +// This comes from interleaved vectorization. It is performed late to capture +// uitofp converts too. +static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if ((VT != MVT::v4i32 && VT != MVT::v8i16) || + N->getOpcode() != ISD::ZERO_EXTEND || + N->getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + unsigned ExtOffset = N->getOperand(0).getConstantOperandVal(1); + if (ExtOffset != 0 && ExtOffset != VT.getVectorNumElements()) + return SDValue(); + + EVT InVT = N->getOperand(0).getOperand(0).getValueType(); + auto *Shuffle = dyn_cast(N->getOperand(0).getOperand(0)); + if (!Shuffle || + InVT.getVectorNumElements() != VT.getVectorNumElements() * 2 || + InVT.getScalarSizeInBits() * 2 != VT.getScalarSizeInBits()) + return SDValue(); + + unsigned Idx; + bool IsDeInterleave = ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements()), 4, Idx); + // An undef interleave shuffle can come up after other canonicalizations, + // where the shuffle has been converted to + // zext(extract(shuffle b, undef, [u,u,0,4])) + bool IsUndefDeInterleave = false; + if (!IsDeInterleave) + IsUndefDeInterleave = + Shuffle->getOperand(1).isUndef() && + ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2, + VT.getVectorNumElements() / 2), + 4, Idx); + if ((!IsDeInterleave && !IsUndefDeInterleave) || Idx >= 4) + return SDValue(); + SDLoc DL(N); + SDValue BC1 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, + Shuffle->getOperand(IsUndefDeInterleave ? 1 : 0)); + SDValue BC2 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, + Shuffle->getOperand(IsUndefDeInterleave ? 0 : 1)); + SDValue UZP = DAG.getNode(Idx < 2 ? AArch64ISD::UZP1 : AArch64ISD::UZP2, DL, + VT, BC1, BC2); + if ((Idx & 1) == 1) + UZP = DAG.getNode(ISD::SRL, DL, VT, UZP, + DAG.getConstant(InVT.getScalarSizeInBits(), DL, VT)); + return DAG.getNode( + ISD::AND, DL, VT, UZP, + DAG.getConstant((1 << InVT.getScalarSizeInBits()) - 1, DL, VT)); +} + static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -22206,6 +22259,9 @@ static SDValue performExtendCombine(SDNode *N, return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); } + if (SDValue R = performZExtDeinterleaveShuffleCombine(N, DAG)) + return R; + if (N->getValueType(0).isFixedLengthVector() && N->getOpcode() == ISD::SIGN_EXTEND && N->getOperand(0)->getOpcode() == ISD::SETCC) diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index b4e58cfd98a236..5c49a8116ae7fc 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -1675,6 +1675,10 @@ defm IMAGE_ATOMIC_PK_ADD_BF16 : MIMG_Atomic , "image_atomic_add_flt", 0, 1>; defm IMAGE_ATOMIC_MIN_FLT : MIMG_Atomic , "image_atomic_min_num_flt", 0, 1, "image_atomic_min_flt">; defm IMAGE_ATOMIC_MAX_FLT : MIMG_Atomic , "image_atomic_max_num_flt", 0, 1, "image_atomic_max_flt">; +let AssemblerPredicate = isGFX12Plus in { + def : AMDGPUMnemonicAlias<"image_atomic_fmin", "image_atomic_min_flt">; + def : AMDGPUMnemonicAlias<"image_atomic_fmax", "image_atomic_max_flt">; +} defm IMAGE_SAMPLE : MIMG_Sampler_WQM , AMDGPUSample>; let OtherPredicates = [HasImageInsts, HasExtendedImageInsts] in { diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 8eb5d91d3b8792..710182985a1e9e 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -2411,12 +2411,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { case ARM::SEH_EpilogEnd: ATS.emitARMWinCFIEpilogEnd(); return; - - case ARM::PseudoARMInitUndefMQPR: - case ARM::PseudoARMInitUndefSPR: - case ARM::PseudoARMInitUndefDPR_VFP2: - case ARM::PseudoARMInitUndefGPR: - return; } MCInst TmpInst; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 27290f7f76347c..aee9797585dbd2 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -546,19 +546,6 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { std::optional isAddImmediate(const MachineInstr &MI, Register Reg) const override; - - unsigned getUndefInitOpcode(unsigned RegClassID) const override { - if (RegClassID == ARM::MQPRRegClass.getID()) - return ARM::PseudoARMInitUndefMQPR; - if (RegClassID == ARM::SPRRegClass.getID()) - return ARM::PseudoARMInitUndefSPR; - if (RegClassID == ARM::DPR_VFP2RegClass.getID()) - return ARM::PseudoARMInitUndefDPR_VFP2; - if (RegClassID == ARM::GPRRegClass.getID()) - return ARM::PseudoARMInitUndefGPR; - - llvm_unreachable("Unexpected register class."); - } }; /// Get the operands corresponding to the given \p Pred value. By default, the diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9096617a948557..aa663556deb760 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21073,7 +21073,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.arg_size() - 1); Info.offset = 0; - Info.align.reset(); + Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -21120,7 +21120,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align.reset(); + Info.align = I.getParamAlign(0).valueOrOne(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 0fc561382084e3..ed68c6ff20cde5 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6549,15 +6549,3 @@ let isPseudo = 1 in { let isTerminator = 1 in def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; } - - -//===----------------------------------------------------------------------===// -// Pseudo Instructions for use when early-clobber is defined and Greedy Register -// Allocation is used. This ensures the constraint is used properly. -//===----------------------------------------------------------------------===// -let isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { - def PseudoARMInitUndefMQPR : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>; - def PseudoARMInitUndefSPR : PseudoInst<(outs SPR:$sd), (ins), NoItinerary, []>; - def PseudoARMInitUndefDPR_VFP2 : PseudoInst<(outs DPR_VFP2:$dd), (ins), NoItinerary, []>; - def PseudoARMInitUndefGPR : PseudoInst<(outs GPR:$rd), (ins), NoItinerary, []>; -} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 912569a8fec118..9b5349241c341b 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -163,6 +163,22 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { break; } + case Intrinsic::arm_neon_vld1x2: + case Intrinsic::arm_neon_vld1x3: + case Intrinsic::arm_neon_vld1x4: + case Intrinsic::arm_neon_vst1x2: + case Intrinsic::arm_neon_vst1x3: + case Intrinsic::arm_neon_vst1x4: { + Align NewAlign = + getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II, + &IC.getAssumptionCache(), &IC.getDominatorTree()); + Align OldAlign = II.getParamAlign(0).valueOrOne(); + if (NewAlign > OldAlign) + II.addParamAttr(0, + Attribute::getWithAlignment(II.getContext(), NewAlign)); + break; + } + case Intrinsic::arm_mve_pred_i2v: { Value *Arg = II.getArgOperand(0); Value *ArgArg; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 7a0b35c1afce23..cf3ea3e4ea2131 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -129,6 +129,14 @@ enum { ElementsDependOnMaskShift = ElementsDependOnVLShift + 1, ElementsDependOnMaskMask = 1ULL << ElementsDependOnMaskShift, + + // Indicates the EEW of a vector instruction's destination operand. + // 0 -> 1 + // 1 -> SEW + // 2 -> SEW * 2 + // 3 -> SEW * 4 + DestEEWShift = ElementsDependOnMaskShift + 1, + DestEEWMask = 3ULL << DestEEWShift, }; // Helper functions to read TSFlags. diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 476dde2be39e57..24bca2da652d0e 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -303,11 +303,6 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { case RISCV::KCFI_CHECK: LowerKCFI_CHECK(*MI); return; - case RISCV::PseudoRVVInitUndefM1: - case RISCV::PseudoRVVInitUndefM2: - case RISCV::PseudoRVVInitUndefM4: - case RISCV::PseudoRVVInitUndefM8: - return; case TargetOpcode::STACKMAP: return LowerSTACKMAP(*OutStreamer, SM, *MI); case TargetOpcode::PATCHPOINT: diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index a389320adc8763..fcea18f81b3901 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -167,6 +167,14 @@ def EltDepsNone : EltDeps; def EltDepsVL : EltDeps; def EltDepsVLMask : EltDeps; +class EEW val> { + bits<2> Value = val; +} +def EEW1 : EEW<0>; +def EEWSEWx1 : EEW<1>; +def EEWSEWx2 : EEW<2>; +def EEWSEWx4 : EEW<3>; + class RVInstCommon pattern, InstFormat format> : Instruction { let Namespace = "RISCV"; @@ -240,6 +248,10 @@ class RVInstCommonBaseInstr; } + +unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) { + unsigned DestEEW = + (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift; + // EEW = 1 + if (DestEEW == 0) + return 0; + // EEW = SEW * n + unsigned Scaled = Log2SEW + (DestEEW - 1); + assert(Scaled >= 3 && Scaled <= 6); + return Scaled; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 8494110adffb94..457db9b9860d00 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -293,21 +293,6 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; - unsigned getUndefInitOpcode(unsigned RegClassID) const override { - switch (RegClassID) { - case RISCV::VRRegClassID: - return RISCV::PseudoRVVInitUndefM1; - case RISCV::VRM2RegClassID: - return RISCV::PseudoRVVInitUndefM2; - case RISCV::VRM4RegClassID: - return RISCV::PseudoRVVInitUndefM4; - case RISCV::VRM8RegClassID: - return RISCV::PseudoRVVInitUndefM8; - default: - llvm_unreachable("Unexpected register class."); - } - } - protected: const RISCVSubtarget &STI; @@ -354,6 +339,10 @@ std::optional getVectorLowDemandedScalarBits(uint16_t Opcode, // Returns the MC opcode of RVV pseudo instruction. unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode); +// For a (non-pseudo) RVV instruction \p Desc and the given \p Log2SEW, returns +// the log2 EEW of the destination operand. +unsigned getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW); + // Special immediate for AVL operand of V pseudo instructions to indicate VLMax. static constexpr int64_t VLMaxSentinel = -1LL; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 738bb5d9bd65bf..6f7d14d5503bd3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1104,7 +1104,7 @@ def : InstAlias<"vneg.v $vd, $vs", (VRSUB_VX VR:$vd, VR:$vs, X0, zero_reg)>; // The destination vector register group cannot overlap a source vector // register group of a different element width (including the mask register // if masked), otherwise an illegal instruction exception is raised. -let Constraints = "@earlyclobber $vd" in { +let Constraints = "@earlyclobber $vd", DestEEW = EEWSEWx2 in { let RVVConstraint = WidenV in { defm VWADDU_V : VALU_MV_V_X<"vwaddu", 0b110000, "v">; defm VWSUBU_V : VALU_MV_V_X<"vwsubu", 0b110010, "v">; @@ -1121,7 +1121,7 @@ defm VWSUBU_W : VALU_MV_V_X<"vwsubu", 0b110110, "w">; defm VWADD_W : VALU_MV_V_X<"vwadd", 0b110101, "w">; defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">; } // RVVConstraint = WidenW -} // Constraints = "@earlyclobber $vd" +} // Constraints = "@earlyclobber $vd", DestEEW = EEWSEWx2 def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm", (VWADD_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; @@ -1147,10 +1147,11 @@ defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>; defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>; -let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, + DestEEW = EEW1 in { defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>; defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, DestEEW = EEW1 // Vector Bitwise Logical Instructions defm VAND_V : VALU_IV_V_X_I<"vand", 0b001001>; @@ -1183,7 +1184,7 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs", (VNSRL_WX VR:$vd, VR:$vs, X0, zero_reg)>; // Vector Integer Comparison Instructions -let RVVConstraint = NoConstraint in { +let RVVConstraint = NoConstraint, DestEEW = EEW1 in { defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>; defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>; defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>; @@ -1192,7 +1193,7 @@ defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>; defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>; defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>; defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>; -} // RVVConstraint = NoConstraint +} // RVVConstraint = NoConstraint, DestEEW = EEW1 def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm", (VMSLTU_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; @@ -1204,7 +1205,7 @@ def : InstAlias<"vmsge.vv $vd, $va, $vb$vm", (VMSLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0, - mayStore = 0 in { + mayStore = 0, DestEEW = EEW1 in { // For unsigned comparisons we need to special case 0 immediate to maintain // the always true/false semantics we would invert if we just decremented the // immediate like we do for signed. To match the GNU assembler we will use @@ -1227,7 +1228,7 @@ def PseudoVMSLT_VI : Pseudo<(outs VR:$vd), } let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0, - mayStore = 0 in { + mayStore = 0, DestEEW = EEW1 in { def PseudoVMSGEU_VX : Pseudo<(outs VR:$vd), (ins VR:$vs2, GPR:$rs1), [], "vmsgeu.vx", "$vd, $vs2, $rs1">; @@ -1267,11 +1268,12 @@ defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>; defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>; // Vector Widening Integer Multiply Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { +let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, + DestEEW = EEWSEWx2 in { defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>; defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>; defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, DestEEW = EEWSEWx2 // Vector Single-Width Integer Multiply-Add Instructions defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>; @@ -1280,10 +1282,12 @@ defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>; defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>; // Vector Widening Integer Multiply-Add Instructions +let DestEEW = EEWSEWx2 in { defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>; defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>; defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>; defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>; +} // DestEEW = EEWSEWx2 // Vector Integer Merge Instructions defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>; @@ -1342,7 +1346,8 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; // Vector Widening Floating-Point Add/Subtract Instructions let Constraints = "@earlyclobber $vd", Uses = [FRM], - mayRaiseFPException = true in { + mayRaiseFPException = true, + DestEEW = EEWSEWx2 in { let RVVConstraint = WidenV in { defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000, "v">; defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010, "v">; @@ -1355,7 +1360,7 @@ let RVVConstraint = WidenW in { defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW -} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true +} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Single-Width Floating-Point Multiply/Divide Instructions let Uses = [FRM], mayRaiseFPException = true in { @@ -1366,9 +1371,9 @@ defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>; // Vector Widening Floating-Point Multiply let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, - Uses = [FRM], mayRaiseFPException = true in { + Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions let Uses = [FRM], mayRaiseFPException = true in { @@ -1383,12 +1388,12 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; } // Vector Widening Floating-Point Fused Multiply-Add Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Floating-Point Square-Root Instruction let Uses = [FRM], mayRaiseFPException = true in { @@ -1420,14 +1425,14 @@ def : InstAlias<"vfabs.v $vd, $vs", (VFSGNJX_VV VR:$vd, VR:$vs, VR:$vs, zero_reg)>; // Vector Floating-Point Compare Instructions -let RVVConstraint = NoConstraint, mayRaiseFPException = true in { +let RVVConstraint = NoConstraint, mayRaiseFPException = true, DestEEW = EEW1 in { defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>; defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>; defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>; defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>; defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>; defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>; -} // RVVConstraint = NoConstraint, mayRaiseFPException = true +} // RVVConstraint = NoConstraint, mayRaiseFPException = true, DestEEW = EEW1 def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", (VMFLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; @@ -1471,7 +1476,7 @@ defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt, - mayRaiseFPException = true in { + mayRaiseFPException = true, DestEEW = EEWSEWx2 in { let Uses = [FRM] in { defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; @@ -1481,7 +1486,7 @@ defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt, DestEEW = EEWSEWx2 // Narrowing Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", mayRaiseFPException = true in { @@ -1515,14 +1520,14 @@ defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>; } // RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask // Vector Widening Integer Reduction Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask, DestEEW = EEWSEWx2 in { // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>; defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask, DestEEW = EEWSEWx2 } // Predicates = [HasVInstructions] @@ -1543,7 +1548,7 @@ def : InstAlias<"vfredsum.vs $vd, $vs2, $vs1$vm", (VFREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>; // Vector Widening Floating-Point Reduction Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask, DestEEW = EEWSEWx2 in { // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to @@ -1552,7 +1557,7 @@ let Uses = [FRM], mayRaiseFPException = true in { defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>; } -} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask, DestEEW = EEWSEWx2 def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm", (VFWREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>; @@ -1560,7 +1565,7 @@ def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm", let Predicates = [HasVInstructions] in { // Vector Mask-Register Logical Instructions -let RVVConstraint = NoConstraint in { +let RVVConstraint = NoConstraint, DestEEW = EEW1 in { defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">; defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">; defm VMANDN_M : VMALU_MV_Mask<"vmandn", 0b011000, "m">; @@ -1607,12 +1612,14 @@ def : InstAlias<"vpopc.m $vd, $vs2$vm", let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask in { +let DestEEW = EEW1 in { // vmsbf.m set-before-first mask bit defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>; // vmsif.m set-including-first mask bit defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>; // vmsof.m set-only-first mask bit defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>; +} // DestEEW = EEW1 // Vector Iota Instruction defm VIOTA_M : VIOTA_MV_V<"viota.m", 0b010100, 0b10000>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 1b4303fbbcf809..c91c9c3614a34c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -6116,15 +6116,6 @@ foreach lmul = MxList in { } } -/// Empty pseudo for RISCVInitUndefPass -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0, - isCodeGenOnly = 1 in { - def PseudoRVVInitUndefM1 : Pseudo<(outs VR:$vd), (ins), [], "">; - def PseudoRVVInitUndefM2 : Pseudo<(outs VRM2:$vd), (ins), [], "">; - def PseudoRVVInitUndefM4 : Pseudo<(outs VRM4:$vd), (ins), [], "">; - def PseudoRVVInitUndefM8 : Pseudo<(outs VRM8:$vd), (ins), [], "">; -} - //===----------------------------------------------------------------------===// // 6. Configuration-Setting Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 3c1fb38349d5ca..851e817c501250 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -201,21 +201,24 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0, defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>; } -let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod" in { +let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod", + DestEEW = EEWSEWx4 in { def VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">; def VQMACC_2x8x2 : CustomSiFiveVMACC<0b101101, OPMVV, "sf.vqmacc.2x8x2">; def VQMACCUS_2x8x2 : CustomSiFiveVMACC<0b101110, OPMVV, "sf.vqmaccus.2x8x2">; def VQMACCSU_2x8x2 : CustomSiFiveVMACC<0b101111, OPMVV, "sf.vqmaccsu.2x8x2">; } -let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in { +let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq", + DestEEW = EEWSEWx4 in { def VQMACCU_4x8x4 : CustomSiFiveVMACC<0b111100, OPMVV, "sf.vqmaccu.4x8x4">; def VQMACC_4x8x4 : CustomSiFiveVMACC<0b111101, OPMVV, "sf.vqmacc.4x8x4">; def VQMACCUS_4x8x4 : CustomSiFiveVMACC<0b111110, OPMVV, "sf.vqmaccus.4x8x4">; def VQMACCSU_4x8x4 : CustomSiFiveVMACC<0b111111, OPMVV, "sf.vqmaccsu.4x8x4">; } -let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in { +let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq", + DestEEW = EEWSEWx2 in { def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index 1b1f3b9b16e44f..a79f757753325c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -19,7 +19,7 @@ let Predicates = [HasStdExtZvfbfmin], Constraints = "@earlyclobber $vd", mayRaiseFPException = true in { -let RVVConstraint = WidenCvt in +let RVVConstraint = WidenCvt, DestEEW = EEWSEWx2 in defm VFWCVTBF16_F_F_V : VWCVTF_FV_VS2<"vfwcvtbf16.f.f.v", 0b010010, 0b01101>; let Uses = [FRM] in defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>; @@ -27,6 +27,7 @@ defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>; let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb", - RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in { + RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, + DestEEW = EEWSEWx2 in { defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index e19a11805c9c08..7ec13e4eaafa7d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -123,7 +123,8 @@ let Predicates = [HasStdExtZvbb] in { def VCLZ_V : VALUVs2<0b010010, 0b01100, OPMVV, "vclz.v">; def VCPOP_V : VALUVs2<0b010010, 0b01110, OPMVV, "vcpop.v">; def VCTZ_V : VALUVs2<0b010010, 0b01101, OPMVV, "vctz.v">; - let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in + let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, + DestEEW = EEWSEWx2 in defm VWSLL_V : VSHT_IV_V_X_I<"vwsll", 0b110101>; } // Predicates = [HasStdExtZvbb] diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 026e0a365b8dcb..a612a03106f024 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -69,6 +69,7 @@ class RISCVVectorPeephole : public MachineFunctionPass { bool foldUndefPassthruVMV_V_V(MachineInstr &MI); bool foldVMV_V_V(MachineInstr &MI); + bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const; bool isAllOnesMask(const MachineInstr *MaskDef) const; std::optional getConstant(const MachineOperand &VL) const; bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const; @@ -98,10 +99,17 @@ static bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) { return LHS.getImm() <= RHS.getImm(); } -static unsigned getSEWLMULRatio(const MachineInstr &MI) { - RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags); - unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); - return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL); +/// Given \p User that has an input operand with EEW=SEW, which uses the dest +/// operand of \p Src with an unknown EEW, return true if their EEWs match. +bool RISCVVectorPeephole::hasSameEEW(const MachineInstr &User, + const MachineInstr &Src) const { + unsigned UserLog2SEW = + User.getOperand(RISCVII::getSEWOpNum(User.getDesc())).getImm(); + unsigned SrcLog2SEW = + Src.getOperand(RISCVII::getSEWOpNum(Src.getDesc())).getImm(); + unsigned SrcLog2EEW = RISCV::getDestLog2EEW( + TII->get(RISCV::getRVVMCOpcode(Src.getOpcode())), SrcLog2SEW); + return SrcLog2EEW == UserLog2SEW; } // Attempt to reduce the VL of an instruction whose sole use is feeding a @@ -154,8 +162,8 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { !RISCVII::hasSEWOp(Src->getDesc().TSFlags)) return false; - // Src needs to have the same VLMAX as MI - if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src)) + // Src's dest needs to have the same EEW as MI's input. + if (!hasSameEEW(MI, *Src)) return false; bool ElementsDependOnVL = RISCVII::elementsDependOnVL( @@ -486,8 +494,7 @@ bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { if (Src && !Src->hasUnmodeledSideEffects() && MRI->hasOneUse(MI.getOperand(2).getReg()) && RISCVII::hasVLOp(Src->getDesc().TSFlags) && - RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags) && - getSEWLMULRatio(MI) == getSEWLMULRatio(*Src)) { + RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags) && hasSameEEW(MI, *Src)) { const MachineOperand &MIVL = MI.getOperand(3); const MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc())); @@ -532,8 +539,8 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { !RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags)) return false; - // Src needs to have the same VLMAX as MI - if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src)) + // Src's dest needs to have the same EEW as MI's input. + if (!hasSameEEW(MI, *Src)) return false; // Src needs to have the same passthru as VMV_V_V diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 14b40e8c2299e0..bd406579f1a918 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -769,8 +769,7 @@ MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name, ArgsTy... Args) { if (TargetTriple.getArch() == Triple::systemz) { // SystemZ ABI: shadow/origin pair is returned via a hidden parameter. - return M.getOrInsertFunction(Name, Type::getVoidTy(*C), - PointerType::get(MsanMetadata, 0), + return M.getOrInsertFunction(Name, Type::getVoidTy(*C), PtrTy, std::forward(Args)...); } @@ -804,29 +803,26 @@ void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) { ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */ IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy, OriginTy); - MsanGetContextStateFn = M.getOrInsertFunction( - "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0)); + MsanGetContextStateFn = + M.getOrInsertFunction("__msan_get_context_state", PtrTy); - MsanMetadata = StructType::get(PointerType::get(IRB.getInt8Ty(), 0), - PointerType::get(IRB.getInt32Ty(), 0)); + MsanMetadata = StructType::get(PtrTy, PtrTy); for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) { std::string name_load = "__msan_metadata_ptr_for_load_" + std::to_string(size); std::string name_store = "__msan_metadata_ptr_for_store_" + std::to_string(size); - MsanMetadataPtrForLoad_1_8[ind] = getOrInsertMsanMetadataFunction( - M, name_load, PointerType::get(IRB.getInt8Ty(), 0)); - MsanMetadataPtrForStore_1_8[ind] = getOrInsertMsanMetadataFunction( - M, name_store, PointerType::get(IRB.getInt8Ty(), 0)); + MsanMetadataPtrForLoad_1_8[ind] = + getOrInsertMsanMetadataFunction(M, name_load, PtrTy); + MsanMetadataPtrForStore_1_8[ind] = + getOrInsertMsanMetadataFunction(M, name_store, PtrTy); } MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction( - M, "__msan_metadata_ptr_for_load_n", PointerType::get(IRB.getInt8Ty(), 0), - IRB.getInt64Ty()); + M, "__msan_metadata_ptr_for_load_n", PtrTy, IRB.getInt64Ty()); MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction( - M, "__msan_metadata_ptr_for_store_n", - PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty()); + M, "__msan_metadata_ptr_for_store_n", PtrTy, IRB.getInt64Ty()); // Functions for poisoning and unpoisoning memory. MsanPoisonAllocaFn = M.getOrInsertFunction( @@ -937,9 +933,8 @@ void MemorySanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo &TL TLI.getAttrList(C, {1}, /*Signed=*/true), PtrTy, PtrTy, IRB.getInt32Ty(), IntptrTy); - MsanInstrumentAsmStoreFn = - M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(), - PointerType::get(IRB.getInt8Ty(), 0), IntptrTy); + MsanInstrumentAsmStoreFn = M.getOrInsertFunction( + "__msan_instrument_asm_store", IRB.getVoidTy(), PtrTy, IntptrTy); if (CompileKernel) { createKernelApi(M, TLI); @@ -1264,8 +1259,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Align CurrentAlignment = Alignment; if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { Value *IntptrOrigin = originToIntptr(IRB, Origin); - Value *IntptrOriginPtr = - IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0)); + Value *IntptrOriginPtr = IRB.CreatePointerCast(OriginPtr, MS.PtrTy); for (unsigned i = 0; i < Size / IntptrSize; ++i) { Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) : IntptrOriginPtr; @@ -1691,7 +1685,7 @@ struct MemorySanitizerVisitor : public InstVisitor { VectTy->getElementCount()); } assert(IntPtrTy == MS.IntptrTy); - return PointerType::get(*MS.C, 0); + return MS.PtrTy; } Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const { @@ -1787,8 +1781,7 @@ struct MemorySanitizerVisitor : public InstVisitor { TypeSize Size = DL.getTypeStoreSize(ShadowTy); FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size); - Value *AddrCast = - IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0)); + Value *AddrCast = IRB.CreatePointerCast(Addr, MS.PtrTy); if (Getter) { ShadowOriginPtrs = createMetadataCall(IRB, Getter, AddrCast); } else { @@ -1799,7 +1792,7 @@ struct MemorySanitizerVisitor : public InstVisitor { AddrCast, SizeVal); } Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0); - ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0)); + ShadowPtr = IRB.CreatePointerCast(ShadowPtr, MS.PtrTy); Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1); return std::make_pair(ShadowPtr, OriginPtr); @@ -5009,21 +5002,19 @@ struct VarArgHelperBase : public VarArgHelper { } /// Compute the shadow address for a given va_arg. - Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, - unsigned ArgOffset) { + Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) { Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), - "_msarg_va_s"); + return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_s"); } /// Compute the shadow address for a given va_arg. - Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, - unsigned ArgOffset, unsigned ArgSize) { + Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset, + unsigned ArgSize) { // Make sure we don't overflow __msan_va_arg_tls. if (ArgOffset + ArgSize > kParamTLSSize) return nullptr; - return getShadowPtrForVAArgument(Ty, IRB, ArgOffset); + return getShadowPtrForVAArgument(IRB, ArgOffset); } /// Compute the origin address for a given va_arg. @@ -5033,8 +5024,7 @@ struct VarArgHelperBase : public VarArgHelper { // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never // overflow. Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), - "_msarg_va_o"); + return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_o"); } void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase, @@ -5147,8 +5137,7 @@ struct VarArgAMD64Helper : public VarArgHelperBase { uint64_t ArgSize = DL.getTypeAllocSize(RealTy); uint64_t AlignedSize = alignTo(ArgSize, 8); unsigned BaseOffset = OverflowOffset; - Value *ShadowBase = - getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); + Value *ShadowBase = getShadowPtrForVAArgument(IRB, OverflowOffset); Value *OriginBase = nullptr; if (MS.TrackOrigins) OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset); @@ -5177,14 +5166,14 @@ struct VarArgAMD64Helper : public VarArgHelperBase { Value *ShadowBase, *OriginBase = nullptr; switch (AK) { case AK_GeneralPurpose: - ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset); + ShadowBase = getShadowPtrForVAArgument(IRB, GpOffset); if (MS.TrackOrigins) OriginBase = getOriginPtrForVAArgument(IRB, GpOffset); GpOffset += 8; assert(GpOffset <= kParamTLSSize); break; case AK_FloatingPoint: - ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset); + ShadowBase = getShadowPtrForVAArgument(IRB, FpOffset); if (MS.TrackOrigins) OriginBase = getOriginPtrForVAArgument(IRB, FpOffset); FpOffset += 16; @@ -5196,8 +5185,7 @@ struct VarArgAMD64Helper : public VarArgHelperBase { uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); uint64_t AlignedSize = alignTo(ArgSize, 8); unsigned BaseOffset = OverflowOffset; - ShadowBase = - getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); + ShadowBase = getShadowPtrForVAArgument(IRB, OverflowOffset); if (MS.TrackOrigins) { OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset); } @@ -5263,13 +5251,11 @@ struct VarArgAMD64Helper : public VarArgHelperBase { NextNodeIRBuilder IRB(OrigInst); Value *VAListTag = OrigInst->getArgOperand(0); - Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64* Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, 16)), - PointerType::get(RegSaveAreaPtrTy, 0)); - Value *RegSaveAreaPtr = - IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); + MS.PtrTy); + Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr); Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; const Align Alignment = Align(16); std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = @@ -5280,13 +5266,12 @@ struct VarArgAMD64Helper : public VarArgHelperBase { if (MS.TrackOrigins) IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy, Alignment, AMD64FpEndOffset); - Type *OverflowArgAreaPtrTy = PointerType::getUnqual(*MS.C); // i64* Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, 8)), - PointerType::get(OverflowArgAreaPtrTy, 0)); + MS.PtrTy); Value *OverflowArgAreaPtr = - IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr); + IRB.CreateLoad(MS.PtrTy, OverflowArgAreaPtrPtr); Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(), @@ -5329,7 +5314,7 @@ struct VarArgMIPS64Helper : public VarArgHelperBase { if (ArgSize < 8) VAArgOffset += (8 - ArgSize); } - Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); + Base = getShadowPtrForVAArgument(IRB, VAArgOffset, ArgSize); VAArgOffset += ArgSize; VAArgOffset = alignTo(VAArgOffset, 8); if (!Base) @@ -5371,12 +5356,9 @@ struct VarArgMIPS64Helper : public VarArgHelperBase { for (CallInst *OrigInst : VAStartInstrumentationList) { NextNodeIRBuilder IRB(OrigInst); Value *VAListTag = OrigInst->getArgOperand(0); - Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64* - Value *RegSaveAreaPtrPtr = - IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), - PointerType::get(RegSaveAreaPtrTy, 0)); - Value *RegSaveAreaPtr = - IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); + Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( + IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), MS.PtrTy); + Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr); Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; const Align Alignment = Align(8); std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = @@ -5460,11 +5442,11 @@ struct VarArgAArch64Helper : public VarArgHelperBase { Value *Base; switch (AK) { case AK_GeneralPurpose: - Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset); + Base = getShadowPtrForVAArgument(IRB, GrOffset); GrOffset += 8 * RegNum; break; case AK_FloatingPoint: - Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset); + Base = getShadowPtrForVAArgument(IRB, VrOffset); VrOffset += 16 * RegNum; break; case AK_Memory: @@ -5475,7 +5457,7 @@ struct VarArgAArch64Helper : public VarArgHelperBase { uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); uint64_t AlignedSize = alignTo(ArgSize, 8); unsigned BaseOffset = OverflowOffset; - Base = getShadowPtrForVAArgument(A->getType(), IRB, BaseOffset); + Base = getShadowPtrForVAArgument(IRB, BaseOffset); OverflowOffset += AlignedSize; if (OverflowOffset > kParamTLSSize) { // We have no space to copy shadow there. @@ -5500,7 +5482,7 @@ struct VarArgAArch64Helper : public VarArgHelperBase { Value *SaveAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, offset)), - PointerType::get(*MS.C, 0)); + MS.PtrTy); return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr); } @@ -5509,7 +5491,7 @@ struct VarArgAArch64Helper : public VarArgHelperBase { Value *SaveAreaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, offset)), - PointerType::get(*MS.C, 0)); + MS.PtrTy); Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr); return IRB.CreateSExt(SaveArea32, MS.IntptrTy); } @@ -5670,8 +5652,8 @@ struct VarArgPowerPC64Helper : public VarArgHelperBase { ArgAlign = Align(8); VAArgOffset = alignTo(VAArgOffset, ArgAlign); if (!IsFixed) { - Value *Base = getShadowPtrForVAArgument( - RealTy, IRB, VAArgOffset - VAArgBase, ArgSize); + Value *Base = + getShadowPtrForVAArgument(IRB, VAArgOffset - VAArgBase, ArgSize); if (Base) { Value *AShadowPtr, *AOriginPtr; std::tie(AShadowPtr, AOriginPtr) = @@ -5707,8 +5689,8 @@ struct VarArgPowerPC64Helper : public VarArgHelperBase { VAArgOffset += (8 - ArgSize); } if (!IsFixed) { - Base = getShadowPtrForVAArgument(A->getType(), IRB, - VAArgOffset - VAArgBase, ArgSize); + Base = + getShadowPtrForVAArgument(IRB, VAArgOffset - VAArgBase, ArgSize); if (Base) IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); } @@ -5755,12 +5737,9 @@ struct VarArgPowerPC64Helper : public VarArgHelperBase { for (CallInst *OrigInst : VAStartInstrumentationList) { NextNodeIRBuilder IRB(OrigInst); Value *VAListTag = OrigInst->getArgOperand(0); - Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64* - Value *RegSaveAreaPtrPtr = - IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), - PointerType::get(RegSaveAreaPtrTy, 0)); - Value *RegSaveAreaPtr = - IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); + Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( + IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), MS.PtrTy); + Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr); Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; const Align Alignment = Align(8); std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = @@ -5855,7 +5834,7 @@ struct VarArgSystemZHelper : public VarArgHelperBase { Type *T = A->getType(); ArgKind AK = classifyArgument(T); if (AK == ArgKind::Indirect) { - T = PointerType::get(T, 0); + T = MS.PtrTy; AK = ArgKind::GeneralPurpose; } if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset) @@ -5948,8 +5927,7 @@ struct VarArgSystemZHelper : public VarArgHelperBase { if (SE != ShadowExtension::None) Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(), /*Signed*/ SE == ShadowExtension::Sign); - ShadowBase = IRB.CreateIntToPtr( - ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s"); + ShadowBase = IRB.CreateIntToPtr(ShadowBase, MS.PtrTy, "_msarg_va_s"); IRB.CreateStore(Shadow, ShadowBase); if (MS.TrackOrigins) { Value *Origin = MSV.getOrigin(A); @@ -5964,13 +5942,12 @@ struct VarArgSystemZHelper : public VarArgHelperBase { } void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) { - Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64* Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd( IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)), - PointerType::get(RegSaveAreaPtrTy, 0)); - Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); + MS.PtrTy); + Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr); Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; const Align Alignment = Align(8); std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = @@ -5991,14 +5968,12 @@ struct VarArgSystemZHelper : public VarArgHelperBase { // FIXME: This implementation limits OverflowOffset to kParamTLSSize, so we // don't know real overflow size and can't clear shadow beyond kParamTLSSize. void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) { - Type *OverflowArgAreaPtrTy = PointerType::getUnqual(*MS.C); // i64* Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd( IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)), - PointerType::get(OverflowArgAreaPtrTy, 0)); - Value *OverflowArgAreaPtr = - IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr); + MS.PtrTy); + Value *OverflowArgAreaPtr = IRB.CreateLoad(MS.PtrTy, OverflowArgAreaPtrPtr); Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; const Align Alignment = Align(8); std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll index 4ef8daf141715b..6415fba29ff79b 100644 --- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll @@ -76,12 +76,9 @@ define <2 x i64> @v2i64_37(<4 x i32> %a, <4 x i32> %b) { define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) { ; CHECK-LABEL: v2i64_i16_04812: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ret @@ -93,12 +90,8 @@ define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) { define <4 x i64> @v2i64_i16_15913(<16 x i16> %a) { ; CHECK-LABEL: v2i64_i16_15913: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ret @@ -110,12 +103,9 @@ define <4 x i64> @v2i64_i16_15913(<16 x i16> %a) { define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) { ; CHECK-LABEL: v2i64_i16_261014: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ret @@ -127,12 +117,8 @@ define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) { define <4 x i64> @v2i64_i16_371115(<16 x i16> %a) { ; CHECK-LABEL: v2i64_i16_371115: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ret @@ -167,12 +153,9 @@ define <4 x i32> @v4i32_1357(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @v4i32_04812(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: v4i32_04812: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> %d = zext <4 x i16> %c to <4 x i32> @@ -182,12 +165,8 @@ define <4 x i32> @v4i32_04812(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @v4i32_15913(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: v4i32_15913: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> %d = zext <4 x i16> %c to <4 x i32> @@ -197,12 +176,9 @@ define <4 x i32> @v4i32_15913(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @v4i32_261014(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: v4i32_261014: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> %d = zext <4 x i16> %c to <4 x i32> @@ -212,12 +188,8 @@ define <4 x i32> @v4i32_261014(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @v4i32_371115(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: v4i32_371115: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> %d = zext <4 x i16> %c to <4 x i32> @@ -249,12 +221,8 @@ define <8 x i16> @v8i16_1357(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @v8i16_04812(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: v8i16_04812: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> %d = zext <8 x i8> %c to <8 x i16> @@ -264,12 +232,8 @@ define <8 x i16> @v8i16_04812(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @v8i16_15913(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: v8i16_15913: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ushr v0.8h, v0.8h, #8 ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> %d = zext <8 x i8> %c to <8 x i16> @@ -279,12 +243,8 @@ define <8 x i16> @v8i16_15913(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @v8i16_261014(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: v8i16_261014: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> %d = zext <8 x i8> %c to <8 x i16> @@ -294,12 +254,8 @@ define <8 x i16> @v8i16_261014(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @v8i16_371115(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: v8i16_371115: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI21_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ushr v0.8h, v0.8h, #8 ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> %d = zext <8 x i8> %c to <8 x i16> @@ -307,45 +263,104 @@ define <8 x i16> @v8i16_371115(<16 x i8> %a, <16 x i8> %b) { } +define <8 x i32> @v8i32_0246(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i32_0246: +; CHECK: // %bb.0: +; CHECK-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> + %d = zext <8 x i8> %c to <8 x i32> + ret <8 x i32> %d +} + +define <8 x i32> @v8i32_1357(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i32_1357: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v0.16b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> + %d = zext <8 x i8> %c to <8 x i32> + ret <8 x i32> %d +} + +define <8 x i32> @v8i32_04812(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i32_04812: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> + %d = zext <8 x i8> %c to <8 x i32> + ret <8 x i32> %d +} + +define <8 x i32> @v8i32_15913(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i32_15913: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ushr v0.8h, v0.8h, #8 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> + %d = zext <8 x i8> %c to <8 x i32> + ret <8 x i32> %d +} + +define <8 x i32> @v8i32_261014(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i32_261014: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> + %d = zext <8 x i8> %c to <8 x i32> + ret <8 x i32> %d +} + +define <8 x i32> @v8i32_371115(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i32_371115: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ushr v0.8h, v0.8h, #8 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> + %d = zext <8 x i8> %c to <8 x i32> + ret <8 x i32> %d +} + + define <8 x i64> @zext_add(<32 x i16> %l) { ; CHECK-LABEL: zext_add: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI22_0 -; CHECK-NEXT: adrp x9, .LCPI22_3 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: adrp x8, .LCPI22_1 -; CHECK-NEXT: ldr q7, [x9, :lo12:.LCPI22_3] -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI22_1] -; CHECK-NEXT: adrp x8, .LCPI22_2 -; CHECK-NEXT: adrp x9, .LCPI22_7 -; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI22_2] -; CHECK-NEXT: adrp x8, .LCPI22_4 -; CHECK-NEXT: ldr q18, [x9, :lo12:.LCPI22_7] -; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI22_4] -; CHECK-NEXT: adrp x8, .LCPI22_5 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v5.16b, { v0.16b, v1.16b }, v5.16b -; CHECK-NEXT: ldr q17, [x8, :lo12:.LCPI22_5] -; CHECK-NEXT: adrp x8, .LCPI22_6 -; CHECK-NEXT: tbl v7.16b, { v0.16b, v1.16b }, v7.16b -; CHECK-NEXT: ldr q19, [x8, :lo12:.LCPI22_6] -; CHECK-NEXT: tbl v17.16b, { v0.16b, v1.16b }, v17.16b -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v18.16b -; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b -; CHECK-NEXT: tbl v4.16b, { v2.16b, v3.16b }, v6.16b -; CHECK-NEXT: tbl v6.16b, { v2.16b, v3.16b }, v16.16b -; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v19.16b -; CHECK-NEXT: uaddl v5.4s, v5.4h, v7.4h -; CHECK-NEXT: uaddl v7.4s, v17.4h, v0.4h -; CHECK-NEXT: uaddl2 v4.4s, v1.8h, v4.8h -; CHECK-NEXT: uaddl2 v2.4s, v6.8h, v2.8h -; CHECK-NEXT: uaddl v0.2d, v5.2s, v7.2s -; CHECK-NEXT: uaddl2 v1.2d, v5.4s, v7.4s -; CHECK-NEXT: uaddl2 v3.2d, v4.4s, v2.4s -; CHECK-NEXT: uaddl v2.2d, v4.2s, v2.2s +; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v5.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s +; CHECK-NEXT: and v3.16b, v5.16b, v4.16b +; CHECK-NEXT: and v6.16b, v0.16b, v4.16b +; CHECK-NEXT: and v7.16b, v1.16b, v4.16b +; CHECK-NEXT: and v4.16b, v2.16b, v4.16b +; CHECK-NEXT: usra v3.4s, v5.4s, #16 +; CHECK-NEXT: usra v6.4s, v0.4s, #16 +; CHECK-NEXT: usra v7.4s, v1.4s, #16 +; CHECK-NEXT: usra v4.4s, v2.4s, #16 +; CHECK-NEXT: uaddl v0.2d, v3.2s, v6.2s +; CHECK-NEXT: uaddl2 v1.2d, v3.4s, v6.4s +; CHECK-NEXT: uaddl2 v3.2d, v7.4s, v4.4s +; CHECK-NEXT: uaddl v2.2d, v7.2s, v4.2s ; CHECK-NEXT: ret %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> %z1 = zext <8 x i16> %s1 to <8 x i64> @@ -392,86 +407,77 @@ define <8 x i64> @zext_load_add(ptr %p) { define <8 x double> @uitofp_fadd(<32 x i16> %l) { ; CHECK-LABEL: uitofp_fadd: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 -; CHECK-NEXT: adrp x9, .LCPI24_1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: adrp x10, .LCPI24_6 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI24_0] -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI24_1] -; CHECK-NEXT: adrp x8, .LCPI24_2 -; CHECK-NEXT: adrp x9, .LCPI24_3 -; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI24_2] -; CHECK-NEXT: adrp x8, .LCPI24_4 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl v4.16b, { v0.16b, v1.16b }, v4.16b -; CHECK-NEXT: tbl v5.16b, { v2.16b, v3.16b }, v5.16b -; CHECK-NEXT: ldr q7, [x9, :lo12:.LCPI24_3] -; CHECK-NEXT: adrp x9, .LCPI24_5 -; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI24_4] -; CHECK-NEXT: adrp x8, .LCPI24_7 -; CHECK-NEXT: ldr q17, [x9, :lo12:.LCPI24_5] -; CHECK-NEXT: ldr q18, [x10, :lo12:.LCPI24_6] -; CHECK-NEXT: ldr q19, [x8, :lo12:.LCPI24_7] -; CHECK-NEXT: tbl v6.16b, { v0.16b, v1.16b }, v6.16b -; CHECK-NEXT: tbl v7.16b, { v2.16b, v3.16b }, v7.16b -; CHECK-NEXT: tbl v16.16b, { v0.16b, v1.16b }, v16.16b -; CHECK-NEXT: tbl v17.16b, { v2.16b, v3.16b }, v17.16b -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v18.16b -; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v19.16b -; CHECK-NEXT: ushll2 v5.4s, v5.8h, #0 -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll2 v7.4s, v7.8h, #0 -; CHECK-NEXT: ushll v6.4s, v6.4h, #0 -; CHECK-NEXT: ushll v16.4s, v16.4h, #0 -; CHECK-NEXT: ushll2 v20.2d, v5.4s, #0 -; CHECK-NEXT: ushll2 v21.2d, v4.4s, #0 -; CHECK-NEXT: ushll2 v17.4s, v17.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: ushll v2.2d, v5.2s, #0 -; CHECK-NEXT: ushll v3.2d, v4.2s, #0 -; CHECK-NEXT: ushll2 v4.2d, v7.4s, #0 -; CHECK-NEXT: ushll2 v5.2d, v6.4s, #0 -; CHECK-NEXT: ushll v7.2d, v7.2s, #0 -; CHECK-NEXT: ucvtf v18.2d, v20.2d -; CHECK-NEXT: ucvtf v19.2d, v21.2d +; CHECK-NEXT: uzp1 v5.4s, v0.4s, v3.4s +; CHECK-NEXT: uzp1 v6.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi d4, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v7.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s +; CHECK-NEXT: ext v16.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: ext v5.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: uzp2 v1.4s, v0.4s, v3.4s +; CHECK-NEXT: and v17.8b, v6.8b, v4.8b +; CHECK-NEXT: and v18.8b, v7.8b, v4.8b +; CHECK-NEXT: ushr v6.2s, v6.2s, #16 +; CHECK-NEXT: ushr v7.2s, v7.2s, #16 +; CHECK-NEXT: and v21.8b, v0.8b, v4.8b +; CHECK-NEXT: and v22.8b, v2.8b, v4.8b +; CHECK-NEXT: ushr v2.2s, v2.2s, #16 +; CHECK-NEXT: and v19.8b, v16.8b, v4.8b +; CHECK-NEXT: and v20.8b, v5.8b, v4.8b +; CHECK-NEXT: ushll v3.2d, v17.2s, #0 +; CHECK-NEXT: ushll v17.2d, v18.2s, #0 +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ushr v16.2s, v16.2s, #16 +; CHECK-NEXT: ushr v5.2s, v5.2s, #16 ; CHECK-NEXT: ushll v6.2d, v6.2s, #0 -; CHECK-NEXT: ushll2 v20.2d, v17.4s, #0 -; CHECK-NEXT: ushll2 v21.2d, v16.4s, #0 -; CHECK-NEXT: ushll v17.2d, v17.2s, #0 +; CHECK-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-NEXT: ushll v18.2d, v19.2s, #0 +; CHECK-NEXT: ushll v19.2d, v20.2s, #0 +; CHECK-NEXT: ext v20.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: ushll v16.2d, v16.2s, #0 -; CHECK-NEXT: ushll v22.2d, v0.2s, #0 -; CHECK-NEXT: ushll2 v23.2d, v1.4s, #0 -; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: ucvtf v2.2d, v2.2d +; CHECK-NEXT: ushll v21.2d, v21.2s, #0 +; CHECK-NEXT: ushll v5.2d, v5.2s, #0 +; CHECK-NEXT: ushll v22.2d, v22.2s, #0 +; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: ucvtf v3.2d, v3.2d -; CHECK-NEXT: ucvtf v4.2d, v4.2d -; CHECK-NEXT: ucvtf v5.2d, v5.2d -; CHECK-NEXT: ucvtf v7.2d, v7.2d -; CHECK-NEXT: ucvtf v6.2d, v6.2d -; CHECK-NEXT: ucvtf v20.2d, v20.2d -; CHECK-NEXT: ucvtf v21.2d, v21.2d ; CHECK-NEXT: ucvtf v17.2d, v17.2d +; CHECK-NEXT: ucvtf v6.2d, v6.2d +; CHECK-NEXT: and v23.8b, v20.8b, v4.8b +; CHECK-NEXT: and v4.8b, v1.8b, v4.8b +; CHECK-NEXT: ushr v20.2s, v20.2s, #16 +; CHECK-NEXT: ushr v1.2s, v1.2s, #16 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ucvtf v7.2d, v7.2d +; CHECK-NEXT: ucvtf v18.2d, v18.2d +; CHECK-NEXT: ucvtf v19.2d, v19.2d ; CHECK-NEXT: ucvtf v16.2d, v16.2d +; CHECK-NEXT: ushll v23.2d, v23.2s, #0 +; CHECK-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-NEXT: ushll v20.2d, v20.2s, #0 +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-NEXT: ucvtf v5.2d, v5.2d +; CHECK-NEXT: ucvtf v21.2d, v21.2d ; CHECK-NEXT: ucvtf v22.2d, v22.2d -; CHECK-NEXT: ucvtf v23.2d, v23.2d ; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v2.2d, v2.2d +; CHECK-NEXT: ucvtf v23.2d, v23.2d +; CHECK-NEXT: ucvtf v4.2d, v4.2d +; CHECK-NEXT: ucvtf v20.2d, v20.2d ; CHECK-NEXT: ucvtf v1.2d, v1.2d -; CHECK-NEXT: fadd v4.2d, v18.2d, v4.2d -; CHECK-NEXT: fadd v3.2d, v3.2d, v6.2d -; CHECK-NEXT: fadd v2.2d, v2.2d, v7.2d +; CHECK-NEXT: fadd v16.2d, v18.2d, v16.2d +; CHECK-NEXT: fadd v7.2d, v17.2d, v7.2d ; CHECK-NEXT: fadd v5.2d, v19.2d, v5.2d -; CHECK-NEXT: fadd v6.2d, v16.2d, v22.2d -; CHECK-NEXT: fadd v16.2d, v20.2d, v23.2d -; CHECK-NEXT: fadd v7.2d, v17.2d, v1.2d -; CHECK-NEXT: fadd v1.2d, v21.2d, v0.2d -; CHECK-NEXT: fadd v0.2d, v3.2d, v6.2d -; CHECK-NEXT: fadd v3.2d, v4.2d, v16.2d -; CHECK-NEXT: fadd v1.2d, v5.2d, v1.2d -; CHECK-NEXT: fadd v2.2d, v2.2d, v7.2d +; CHECK-NEXT: fadd v3.2d, v3.2d, v6.2d +; CHECK-NEXT: fadd v0.2d, v21.2d, v0.2d +; CHECK-NEXT: fadd v2.2d, v22.2d, v2.2d +; CHECK-NEXT: fadd v4.2d, v4.2d, v1.2d +; CHECK-NEXT: fadd v1.2d, v23.2d, v20.2d +; CHECK-NEXT: fadd v0.2d, v3.2d, v0.2d +; CHECK-NEXT: fadd v2.2d, v7.2d, v2.2d +; CHECK-NEXT: fadd v1.2d, v16.2d, v1.2d +; CHECK-NEXT: fadd v3.2d, v5.2d, v4.2d ; CHECK-NEXT: ret %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> %z1 = uitofp <8 x i16> %s1 to <8 x double> diff --git a/llvm/test/CodeGen/ARM/arm-vld1.ll b/llvm/test/CodeGen/ARM/arm-vld1.ll index 78b0b92013c397..ec2793589759ea 100644 --- a/llvm/test/CodeGen/ARM/arm-vld1.ll +++ b/llvm/test/CodeGen/ARM/arm-vld1.ll @@ -68,7 +68,7 @@ declare %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0(ptr) nounwind readon define %struct.uint16x4x2_t @test_vld1_u16_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u16_x2: -; CHECK: vld1.16 {d16, d17}, [r0:64] +; CHECK: vld1.16 {d16, d17}, [r0] ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr @@ -76,9 +76,39 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2(ptr %a) nounwind { ret %struct.uint16x4x2_t %tmp } +define %struct.uint16x4x2_t @test_vld1_u16_x2_align8(ptr %a) nounwind { +; CHECK-LABEL: test_vld1_u16_x2_align8: +; CHECK: vld1.16 {d16, d17}, [r0:64] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 %a) + ret %struct.uint16x4x2_t %tmp +} + +define %struct.uint16x4x2_t @test_vld1_u16_x2_align16(ptr %a) nounwind { +; CHECK-LABEL: test_vld1_u16_x2_align16: +; CHECK: vld1.16 {d16, d17}, [r0:128] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 %a) + ret %struct.uint16x4x2_t %tmp +} + +define %struct.uint16x4x2_t @test_vld1_u16_x2_align32(ptr %a) nounwind { +; CHECK-LABEL: test_vld1_u16_x2_align32: +; CHECK: vld1.16 {d16, d17}, [r0:128] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 32 %a) + ret %struct.uint16x4x2_t %tmp +} + define %struct.uint16x4x3_t @test_vld1_u16_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u16_x3: -; CHECK: vld1.16 {d16, d17, d18}, [r1:64] +; CHECK: vld1.16 {d16, d17, d18}, [r1] ; CHECK-NEXT: vst1.16 {d16}, [r0:64]! ; CHECK-NEXT: vst1.16 {d17}, [r0:64]! ; CHECK-NEXT: vstr d18, [r0] @@ -89,7 +119,7 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3(ptr %a) nounwind { define %struct.uint16x4x4_t @test_vld1_u16_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u16_x4: -; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.16 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.16 {d16}, [r0:64]! ; CHECK-NEXT: vst1.16 {d17}, [r0:64]! ; CHECK-NEXT: vst1.16 {d18}, [r0:64]! @@ -101,7 +131,7 @@ define %struct.uint16x4x4_t @test_vld1_u16_x4(ptr %a) nounwind { define %struct.uint32x2x2_t @test_vld1_u32_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u32_x2: -; CHECK: vld1.32 {d16, d17}, [r0:64] +; CHECK: vld1.32 {d16, d17}, [r0] ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr @@ -111,7 +141,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2(ptr %a) nounwind { define %struct.uint32x2x3_t @test_vld1_u32_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u32_x3: -; CHECK: vld1.32 {d16, d17, d18}, [r1:64] +; CHECK: vld1.32 {d16, d17, d18}, [r1] ; CHECK-NEXT: vst1.32 {d16}, [r0:64]! ; CHECK-NEXT: vst1.32 {d17}, [r0:64]! ; CHECK-NEXT: vstr d18, [r0] @@ -122,7 +152,7 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3(ptr %a) nounwind { define %struct.uint32x2x4_t @test_vld1_u32_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u32_x4: -; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.32 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.32 {d16}, [r0:64]! ; CHECK-NEXT: vst1.32 {d17}, [r0:64]! ; CHECK-NEXT: vst1.32 {d18}, [r0:64]! @@ -134,7 +164,7 @@ define %struct.uint32x2x4_t @test_vld1_u32_x4(ptr %a) nounwind { define %struct.uint64x1x2_t @test_vld1_u64_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u64_x2: -; CHECK: vld1.64 {d16, d17}, [r0:64] +; CHECK: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr @@ -144,7 +174,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2(ptr %a) nounwind { define %struct.uint64x1x3_t @test_vld1_u64_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u64_x3: -; CHECK: vld1.64 {d16, d17, d18}, [r1:64] +; CHECK: vld1.64 {d16, d17, d18}, [r1] ; CHECK-NEXT: vst1.64 {d16}, [r0:64]! ; CHECK-NEXT: vst1.64 {d17}, [r0:64]! ; CHECK-NEXT: vstr d18, [r0] @@ -155,7 +185,7 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3(ptr %a) nounwind { define %struct.uint64x1x4_t @test_vld1_u64_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u64_x4: -; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.64 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.64 {d16}, [r0:64]! ; CHECK-NEXT: vst1.64 {d17}, [r0:64]! ; CHECK-NEXT: vst1.64 {d18}, [r0:64]! @@ -167,7 +197,7 @@ define %struct.uint64x1x4_t @test_vld1_u64_x4(ptr %a) nounwind { define %struct.uint8x8x2_t @test_vld1_u8_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u8_x2: -; CHECK: vld1.8 {d16, d17}, [r0:64] +; CHECK: vld1.8 {d16, d17}, [r0] ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr @@ -177,7 +207,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2(ptr %a) nounwind { define %struct.uint8x8x3_t @test_vld1_u8_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u8_x3: -; CHECK: vld1.8 {d16, d17, d18}, [r1:64] +; CHECK: vld1.8 {d16, d17, d18}, [r1] ; CHECK-NEXT: vst1.8 {d16}, [r0:64]! ; CHECK-NEXT: vst1.8 {d17}, [r0:64]! ; CHECK-NEXT: vstr d18, [r0] @@ -188,7 +218,7 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3(ptr %a) nounwind { define %struct.uint8x8x4_t @test_vld1_u8_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1_u8_x4: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.8 {d16}, [r0:64]! ; CHECK-NEXT: vst1.8 {d17}, [r0:64]! ; CHECK-NEXT: vst1.8 {d18}, [r0:64]! @@ -200,7 +230,7 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4(ptr %a) nounwind { define %struct.uint16x8x2_t @test_vld1q_u16_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u16_x2: -; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.16 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] ; CHECK-NEXT: bx lr @@ -210,8 +240,8 @@ define %struct.uint16x8x2_t @test_vld1q_u16_x2(ptr %a) nounwind { define %struct.uint16x8x3_t @test_vld1q_u16_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u16_x3: -; CHECK: vld1.16 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.16 {d19, d20, d21}, [r1:64] +; CHECK: vld1.16 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.16 {d19, d20, d21}, [r1] ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.64 {d20, d21}, [r0] @@ -222,8 +252,8 @@ define %struct.uint16x8x3_t @test_vld1q_u16_x3(ptr %a) nounwind { define %struct.uint16x8x4_t @test_vld1q_u16_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u16_x4: -; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.16 {d20, d21, d22, d23}, [r1:256] +; CHECK: vld1.16 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.16 {d20, d21, d22, d23}, [r1] ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.16 {d20, d21}, [r0]! @@ -235,7 +265,7 @@ define %struct.uint16x8x4_t @test_vld1q_u16_x4(ptr %a) nounwind { define %struct.uint32x4x2_t @test_vld1q_u32_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u32_x2: -; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.32 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] ; CHECK-NEXT: bx lr @@ -245,8 +275,8 @@ define %struct.uint32x4x2_t @test_vld1q_u32_x2(ptr %a) nounwind { define %struct.uint32x4x3_t @test_vld1q_u32_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u32_x3: -; CHECK: vld1.32 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.32 {d19, d20, d21}, [r1:64] +; CHECK: vld1.32 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.32 {d19, d20, d21}, [r1] ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.64 {d20, d21}, [r0] @@ -257,8 +287,8 @@ define %struct.uint32x4x3_t @test_vld1q_u32_x3(ptr %a) nounwind { define %struct.uint32x4x4_t @test_vld1q_u32_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u32_x4: -; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.32 {d20, d21, d22, d23}, [r1:256] +; CHECK: vld1.32 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.32 {d20, d21, d22, d23}, [r1] ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.32 {d20, d21}, [r0]! @@ -270,7 +300,7 @@ define %struct.uint32x4x4_t @test_vld1q_u32_x4(ptr %a) nounwind { define %struct.uint64x2x2_t @test_vld1q_u64_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u64_x2: -; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.64 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] ; CHECK-NEXT: bx lr @@ -280,8 +310,8 @@ define %struct.uint64x2x2_t @test_vld1q_u64_x2(ptr %a) nounwind { define %struct.uint64x2x3_t @test_vld1q_u64_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u64_x3: -; CHECK: vld1.64 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.64 {d19, d20, d21}, [r1:64] +; CHECK: vld1.64 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.64 {d19, d20, d21}, [r1] ; CHECK-NEXT: vst1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.64 {d20, d21}, [r0] @@ -292,8 +322,8 @@ define %struct.uint64x2x3_t @test_vld1q_u64_x3(ptr %a) nounwind { define %struct.uint64x2x4_t @test_vld1q_u64_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u64_x4: -; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.64 {d20, d21, d22, d23}, [r1:256] +; CHECK: vld1.64 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.64 {d20, d21, d22, d23}, [r1] ; CHECK-NEXT: vst1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]! @@ -305,7 +335,7 @@ define %struct.uint64x2x4_t @test_vld1q_u64_x4(ptr %a) nounwind { define %struct.uint8x16x2_t @test_vld1q_u8_x2(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u8_x2: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256] +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1] ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] ; CHECK-NEXT: bx lr @@ -315,8 +345,8 @@ define %struct.uint8x16x2_t @test_vld1q_u8_x2(ptr %a) nounwind { define %struct.uint8x16x3_t @test_vld1q_u8_x3(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u8_x3: -; CHECK: vld1.8 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.8 {d19, d20, d21}, [r1:64] +; CHECK: vld1.8 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.8 {d19, d20, d21}, [r1] ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.64 {d20, d21}, [r0] @@ -327,8 +357,8 @@ define %struct.uint8x16x3_t @test_vld1q_u8_x3(ptr %a) nounwind { define %struct.uint8x16x4_t @test_vld1q_u8_x4(ptr %a) nounwind { ; CHECK-LABEL: test_vld1q_u8_x4: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.8 {d20, d21, d22, d23}, [r1:256] +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.8 {d20, d21, d22, d23}, [r1] ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! ; CHECK-NEXT: vst1.8 {d20, d21}, [r0]! @@ -344,7 +374,7 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_imm(ptr %a, ptr %ptr) nounwin ; CHECK-LABEL: test_vld1_u16_x2_post_imm: ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: vld1.16 {d16, d17}, [r0:64]! +; CHECK-NEXT: vld1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -362,7 +392,7 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(ptr %a, ptr %ptr, i32 %in ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: lsl r2, r2, #1 -; CHECK-NEXT: vld1.16 {d16, d17}, [r0:64], r2 +; CHECK-NEXT: vld1.16 {d16, d17}, [r0], r2 ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -377,7 +407,7 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(ptr %a, ptr %ptr, i32 %in define %struct.uint16x4x3_t @test_vld1_u16_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u16_x3_post_imm: -; CHECK: vld1.16 {d16, d17, d18}, [r1:64]! +; CHECK: vld1.16 {d16, d17, d18}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16}, [r0:64]! ; CHECK-NEXT: vst1.16 {d17}, [r0:64]! @@ -392,7 +422,7 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u16_x3_post_reg: ; CHECK: lsl r3, r3, #1 -; CHECK-NEXT: vld1.16 {d16, d17, d18}, [r1:64], r3 +; CHECK-NEXT: vld1.16 {d16, d17, d18}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16}, [r0:64]! ; CHECK-NEXT: vst1.16 {d17}, [r0:64]! @@ -406,7 +436,7 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(ptr %a, ptr %ptr, i32 %in define %struct.uint16x4x4_t @test_vld1_u16_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u16_x4_post_imm: -; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.16 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16}, [r0:64]! ; CHECK-NEXT: vst1.16 {d17}, [r0:64]! @@ -422,7 +452,7 @@ define %struct.uint16x4x4_t @test_vld1_u16_x4_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint16x4x4_t @test_vld1_u16_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u16_x4_post_reg: ; CHECK: lsl r3, r3, #1 -; CHECK-NEXT: vld1.16 {d16, d17, d18, d19}, [r1:256], r3 +; CHECK-NEXT: vld1.16 {d16, d17, d18, d19}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16}, [r0:64]! ; CHECK-NEXT: vst1.16 {d17}, [r0:64]! @@ -439,7 +469,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_imm(ptr %a, ptr %ptr) nounwin ; CHECK-LABEL: test_vld1_u32_x2_post_imm: ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: vld1.32 {d16, d17}, [r0:64]! +; CHECK-NEXT: vld1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -457,7 +487,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(ptr %a, ptr %ptr, i32 %in ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: lsl r2, r2, #2 -; CHECK-NEXT: vld1.32 {d16, d17}, [r0:64], r2 +; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r2 ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -472,7 +502,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(ptr %a, ptr %ptr, i32 %in define %struct.uint32x2x3_t @test_vld1_u32_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u32_x3_post_imm: -; CHECK: vld1.32 {d16, d17, d18}, [r1:64]! +; CHECK: vld1.32 {d16, d17, d18}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16}, [r0:64]! ; CHECK-NEXT: vst1.32 {d17}, [r0:64]! @@ -487,7 +517,7 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u32_x3_post_reg: ; CHECK: lsl r3, r3, #2 -; CHECK-NEXT: vld1.32 {d16, d17, d18}, [r1:64], r3 +; CHECK-NEXT: vld1.32 {d16, d17, d18}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16}, [r0:64]! ; CHECK-NEXT: vst1.32 {d17}, [r0:64]! @@ -501,7 +531,7 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(ptr %a, ptr %ptr, i32 %in define %struct.uint32x2x4_t @test_vld1_u32_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u32_x4_post_imm: -; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.32 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16}, [r0:64]! ; CHECK-NEXT: vst1.32 {d17}, [r0:64]! @@ -517,7 +547,7 @@ define %struct.uint32x2x4_t @test_vld1_u32_x4_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint32x2x4_t @test_vld1_u32_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u32_x4_post_reg: ; CHECK: lsl r3, r3, #2 -; CHECK-NEXT: vld1.32 {d16, d17, d18, d19}, [r1:256], r3 +; CHECK-NEXT: vld1.32 {d16, d17, d18, d19}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16}, [r0:64]! ; CHECK-NEXT: vst1.32 {d17}, [r0:64]! @@ -534,7 +564,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_imm(ptr %a, ptr %ptr) nounwin ; CHECK-LABEL: test_vld1_u64_x2_post_imm: ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: vld1.64 {d16, d17}, [r0:64]! +; CHECK-NEXT: vld1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -552,7 +582,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(ptr %a, ptr %ptr, i32 %in ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: lsl r2, r2, #3 -; CHECK-NEXT: vld1.64 {d16, d17}, [r0:64], r2 +; CHECK-NEXT: vld1.64 {d16, d17}, [r0], r2 ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -567,7 +597,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(ptr %a, ptr %ptr, i32 %in define %struct.uint64x1x3_t @test_vld1_u64_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u64_x3_post_imm: -; CHECK: vld1.64 {d16, d17, d18}, [r1:64]! +; CHECK: vld1.64 {d16, d17, d18}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16}, [r0:64]! ; CHECK-NEXT: vst1.64 {d17}, [r0:64]! @@ -582,7 +612,7 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u64_x3_post_reg: ; CHECK: lsl r3, r3, #3 -; CHECK-NEXT: vld1.64 {d16, d17, d18}, [r1:64], r3 +; CHECK-NEXT: vld1.64 {d16, d17, d18}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16}, [r0:64]! ; CHECK-NEXT: vst1.64 {d17}, [r0:64]! @@ -596,7 +626,7 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(ptr %a, ptr %ptr, i32 %in define %struct.uint64x1x4_t @test_vld1_u64_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u64_x4_post_imm: -; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.64 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16}, [r0:64]! ; CHECK-NEXT: vst1.64 {d17}, [r0:64]! @@ -612,7 +642,7 @@ define %struct.uint64x1x4_t @test_vld1_u64_x4_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint64x1x4_t @test_vld1_u64_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u64_x4_post_reg: ; CHECK: lsl r3, r3, #3 -; CHECK-NEXT: vld1.64 {d16, d17, d18, d19}, [r1:256], r3 +; CHECK-NEXT: vld1.64 {d16, d17, d18, d19}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16}, [r0:64]! ; CHECK-NEXT: vst1.64 {d17}, [r0:64]! @@ -629,7 +659,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind ; CHECK-LABEL: test_vld1_u8_x2_post_imm: ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: vld1.8 {d16, d17}, [r0:64]! +; CHECK-NEXT: vld1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -646,7 +676,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(ptr %a, ptr %ptr, i32 %inc) ; CHECK-LABEL: test_vld1_u8_x2_post_reg: ; CHECK: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: vld1.8 {d16, d17}, [r0:64], r2 +; CHECK-NEXT: vld1.8 {d16, d17}, [r0], r2 ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: vmov r2, r3, d17 @@ -661,7 +691,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(ptr %a, ptr %ptr, i32 %inc) define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u8_x3_post_imm: -; CHECK: vld1.8 {d16, d17, d18}, [r1:64]! +; CHECK: vld1.8 {d16, d17, d18}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16}, [r0:64]! ; CHECK-NEXT: vst1.8 {d17}, [r0:64]! @@ -675,7 +705,7 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u8_x3_post_reg: -; CHECK: vld1.8 {d16, d17, d18}, [r1:64], r3 +; CHECK: vld1.8 {d16, d17, d18}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16}, [r0:64]! ; CHECK-NEXT: vst1.8 {d17}, [r0:64]! @@ -689,7 +719,7 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1_u8_x4_post_imm: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16}, [r0:64]! ; CHECK-NEXT: vst1.8 {d17}, [r0:64]! @@ -704,7 +734,7 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind { ; CHECK-LABEL: test_vld1_u8_x4_post_reg: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256], r3 +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1], r3 ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16}, [r0:64]! ; CHECK-NEXT: vst1.8 {d17}, [r0:64]! @@ -719,7 +749,7 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) define %struct.uint16x8x2_t @test_vld1q_u16_x2_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u16_x2_post_imm: -; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.16 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] @@ -732,8 +762,8 @@ define %struct.uint16x8x2_t @test_vld1q_u16_x2_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint16x8x3_t @test_vld1q_u16_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u16_x3_post_imm: -; CHECK: vld1.16 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.16 {d19, d20, d21}, [r1:64]! +; CHECK: vld1.16 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.16 {d19, d20, d21}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! @@ -747,8 +777,8 @@ define %struct.uint16x8x3_t @test_vld1q_u16_x3_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u16_x4_post_imm: -; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.16 {d20, d21, d22, d23}, [r1:256]! +; CHECK: vld1.16 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.16 {d20, d21, d22, d23}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! @@ -763,7 +793,7 @@ define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint32x4x2_t @test_vld1q_u32_x2_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u32_x2_post_imm: -; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.32 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] @@ -776,8 +806,8 @@ define %struct.uint32x4x2_t @test_vld1q_u32_x2_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint32x4x3_t @test_vld1q_u32_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u32_x3_post_imm: -; CHECK: vld1.32 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.32 {d19, d20, d21}, [r1:64]! +; CHECK: vld1.32 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.32 {d19, d20, d21}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! @@ -791,8 +821,8 @@ define %struct.uint32x4x3_t @test_vld1q_u32_x3_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u32_x4_post_imm: -; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.32 {d20, d21, d22, d23}, [r1:256]! +; CHECK: vld1.32 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.32 {d20, d21, d22, d23}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! @@ -807,7 +837,7 @@ define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint64x2x2_t @test_vld1q_u64_x2_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u64_x2_post_imm: -; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.64 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] @@ -820,8 +850,8 @@ define %struct.uint64x2x2_t @test_vld1q_u64_x2_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint64x2x3_t @test_vld1q_u64_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u64_x3_post_imm: -; CHECK: vld1.64 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.64 {d19, d20, d21}, [r1:64]! +; CHECK: vld1.64 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.64 {d19, d20, d21}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]! @@ -835,8 +865,8 @@ define %struct.uint64x2x3_t @test_vld1q_u64_x3_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u64_x4_post_imm: -; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.64 {d20, d21, d22, d23}, [r1:256]! +; CHECK: vld1.64 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.64 {d20, d21, d22, d23}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.64 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]! @@ -851,7 +881,7 @@ define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(ptr %a, ptr %ptr) nounwi define %struct.uint8x16x2_t @test_vld1q_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u8_x2_post_imm: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]! +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r0] @@ -864,8 +894,8 @@ define %struct.uint8x16x2_t @test_vld1q_u8_x2_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint8x16x3_t @test_vld1q_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u8_x3_post_imm: -; CHECK: vld1.8 {d16, d17, d18}, [r1:64]! -; CHECK-NEXT: vld1.8 {d19, d20, d21}, [r1:64]! +; CHECK: vld1.8 {d16, d17, d18}, [r1]! +; CHECK-NEXT: vld1.8 {d19, d20, d21}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! @@ -879,8 +909,8 @@ define %struct.uint8x16x3_t @test_vld1q_u8_x3_post_imm(ptr %a, ptr %ptr) nounwin define %struct.uint8x16x4_t @test_vld1q_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind { ; CHECK-LABEL: test_vld1q_u8_x4_post_imm: -; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]! -; CHECK-NEXT: vld1.8 {d20, d21, d22, d23}, [r1:256]! +; CHECK: vld1.8 {d16, d17, d18, d19}, [r1]! +; CHECK-NEXT: vld1.8 {d20, d21, d22, d23}, [r1]! ; CHECK-NEXT: str r1, [r2] ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! diff --git a/llvm/test/CodeGen/ARM/arm-vst1.ll b/llvm/test/CodeGen/ARM/arm-vst1.ll index 7dacd8b0b99f98..5d0a7e9614ce99 100644 --- a/llvm/test/CodeGen/ARM/arm-vst1.ll +++ b/llvm/test/CodeGen/ARM/arm-vst1.ll @@ -92,7 +92,7 @@ declare void @llvm.arm.neon.vst1x4.p0.v16i8(ptr nocapture, <16 x i8>, <16 x i8>, define arm_aapcs_vfpcc void @test_vst1_u16_x2(ptr %a, %struct.uint16x4x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u16_x2: -; CHECK: vst1.16 {d0, d1}, [r0:64] +; CHECK: vst1.16 {d0, d1}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 @@ -101,9 +101,42 @@ entry: ret void } +define arm_aapcs_vfpcc void @test_vst1_u16_x2_align8(ptr %a, %struct.uint16x4x2_t %b) nounwind { +; CHECK-LABEL: test_vst1_u16_x2_align8: +; CHECK: vst1.16 {d0, d1}, [r0:64] +; CHECK-NEXT: bx lr +entry: + %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 + %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1 + tail call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 8 %a, <4 x i16> %b0, <4 x i16> %b1) + ret void +} + +define arm_aapcs_vfpcc void @test_vst1_u16_x2_align16(ptr %a, %struct.uint16x4x2_t %b) nounwind { +; CHECK-LABEL: test_vst1_u16_x2_align16: +; CHECK: vst1.16 {d0, d1}, [r0:128] +; CHECK-NEXT: bx lr +entry: + %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 + %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1 + tail call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 16 %a, <4 x i16> %b0, <4 x i16> %b1) + ret void +} + +define arm_aapcs_vfpcc void @test_vst1_u16_x2_align32(ptr %a, %struct.uint16x4x2_t %b) nounwind { +; CHECK-LABEL: test_vst1_u16_x2_align32: +; CHECK: vst1.16 {d0, d1}, [r0:128] +; CHECK-NEXT: bx lr +entry: + %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 + %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1 + tail call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 32 %a, <4 x i16> %b0, <4 x i16> %b1) + ret void +} + define arm_aapcs_vfpcc void @test_vst1_u16_x3(ptr %a, %struct.uint16x4x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u16_x3: -; CHECK: vst1.16 {d0, d1, d2}, [r0:64] +; CHECK: vst1.16 {d0, d1, d2}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0 @@ -115,7 +148,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u16_x4(ptr %a, %struct.uint16x4x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u16_x4: -; CHECK: vst1.16 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.16 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0 @@ -128,7 +161,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u32_x2(ptr %a, %struct.uint32x2x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u32_x2: -; CHECK: vst1.32 {d0, d1}, [r0:64] +; CHECK: vst1.32 {d0, d1}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0 @@ -139,7 +172,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u32_x3(ptr %a, %struct.uint32x2x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u32_x3: -; CHECK: vst1.32 {d0, d1, d2}, [r0:64] +; CHECK: vst1.32 {d0, d1, d2}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0 @@ -151,7 +184,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u32_x4(ptr %a, %struct.uint32x2x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u32_x4: -; CHECK: vst1.32 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.32 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0 @@ -164,7 +197,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u64_x2(ptr %a, %struct.uint64x1x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u64_x2: -; CHECK: vst1.64 {d0, d1}, [r0:64] +; CHECK: vst1.64 {d0, d1}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0 @@ -175,7 +208,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u64_x3(ptr %a, %struct.uint64x1x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u64_x3: -; CHECK: vst1.64 {d0, d1, d2}, [r0:64] +; CHECK: vst1.64 {d0, d1, d2}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0 @@ -187,7 +220,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u64_x4(ptr %a, %struct.uint64x1x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u64_x4: -; CHECK: vst1.64 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.64 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0 @@ -200,7 +233,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u8_x2(ptr %a, %struct.uint8x8x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u8_x2: -; CHECK: vst1.8 {d0, d1}, [r0:64] +; CHECK: vst1.8 {d0, d1}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0 @@ -211,7 +244,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u8_x3(ptr %a, %struct.uint8x8x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u8_x3: -; CHECK: vst1.8 {d0, d1, d2}, [r0:64] +; CHECK: vst1.8 {d0, d1, d2}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0 @@ -223,7 +256,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1_u8_x4(ptr %a, %struct.uint8x8x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u8_x4: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0 @@ -236,7 +269,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u16_x2(ptr %a, %struct.uint16x8x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u16_x2: -; CHECK: vst1.16 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.16 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0 @@ -247,8 +280,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u16_x3(ptr %a, %struct.uint16x8x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u16_x3: -; CHECK: vst1.16 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.16 {d3, d4, d5}, [r0:64] +; CHECK: vst1.16 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.16 {d3, d4, d5}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0 @@ -260,8 +293,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u16_x4(ptr %a, %struct.uint16x8x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u16_x4: -; CHECK: vst1.16 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.16 {d4, d5, d6, d7}, [r0:256] +; CHECK: vst1.16 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.16 {d4, d5, d6, d7}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0 @@ -274,7 +307,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u32_x2(ptr %a, %struct.uint32x4x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u32_x2: -; CHECK: vst1.32 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.32 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0 @@ -285,8 +318,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u32_x3(ptr %a, %struct.uint32x4x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u32_x3: -; CHECK: vst1.32 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.32 {d3, d4, d5}, [r0:64] +; CHECK: vst1.32 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.32 {d3, d4, d5}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0 @@ -298,8 +331,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u32_x4(ptr %a, %struct.uint32x4x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u32_x4: -; CHECK: vst1.32 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.32 {d4, d5, d6, d7}, [r0:256] +; CHECK: vst1.32 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.32 {d4, d5, d6, d7}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0 @@ -312,7 +345,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u64_x2(ptr %a, %struct.uint64x2x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u64_x2: -; CHECK: vst1.64 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.64 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0 @@ -323,8 +356,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u64_x3(ptr %a, %struct.uint64x2x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u64_x3: -; CHECK: vst1.64 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.64 {d3, d4, d5}, [r0:64] +; CHECK: vst1.64 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.64 {d3, d4, d5}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0 @@ -336,8 +369,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u64_x4(ptr %a, %struct.uint64x2x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u64_x4: -; CHECK: vst1.64 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.64 {d4, d5, d6, d7}, [r0:256] +; CHECK: vst1.64 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.64 {d4, d5, d6, d7}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0 @@ -350,7 +383,7 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u8_x2(ptr %a, %struct.uint8x16x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u8_x2: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256] +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0 @@ -361,8 +394,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u8_x3(ptr %a, %struct.uint8x16x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u8_x3: -; CHECK: vst1.8 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.8 {d3, d4, d5}, [r0:64] +; CHECK: vst1.8 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.8 {d3, d4, d5}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0 @@ -374,8 +407,8 @@ entry: define arm_aapcs_vfpcc void @test_vst1q_u8_x4(ptr %a, %struct.uint8x16x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u8_x4: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.8 {d4, d5, d6, d7}, [r0:256] +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.8 {d4, d5, d6, d7}, [r0] ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0 @@ -390,7 +423,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u8_x2_post_imm(ptr %a, %struct.uint8x8x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u8_x2_post_imm: -; CHECK: vst1.8 {d0, d1}, [r0:64]! +; CHECK: vst1.8 {d0, d1}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0 @@ -402,7 +435,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u8_x2_post_reg(ptr %a, %struct.uint8x8x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u8_x2_post_reg: -; CHECK: vst1.8 {d0, d1}, [r0:64], r1 +; CHECK: vst1.8 {d0, d1}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0 @@ -414,7 +447,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_imm(ptr %a, %struct.uint16x4x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u16_x2_post_imm: -; CHECK: vst1.16 {d0, d1}, [r0:64]! +; CHECK: vst1.16 {d0, d1}, [r0]! ; CHECK-NEXT: bx lr %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1 @@ -426,7 +459,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_imm(ptr %a, %struct.uint16x4x2 define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_reg(ptr %a, %struct.uint16x4x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u16_x2_post_reg: ; CHECK: lsl r1, r1, #1 -; CHECK-NEXT: vst1.16 {d0, d1}, [r0:64], r1 +; CHECK-NEXT: vst1.16 {d0, d1}, [r0], r1 ; CHECK-NEXT: bx lr %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1 @@ -437,7 +470,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_reg(ptr %a, %struct.uint16x4x2 define arm_aapcs_vfpcc ptr @test_vst1_u32_x2_post_imm(ptr %a, %struct.uint32x2x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u32_x2_post_imm: -; CHECK: vst1.32 {d0, d1}, [r0:64]! +; CHECK: vst1.32 {d0, d1}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0 @@ -450,7 +483,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u32_x2_post_reg(ptr %a, %struct.uint32x2x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u32_x2_post_reg: ; CHECK: lsl r1, r1, #2 -; CHECK-NEXT: vst1.32 {d0, d1}, [r0:64], r1 +; CHECK-NEXT: vst1.32 {d0, d1}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0 @@ -462,7 +495,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u64_x2_post_imm(ptr %a, %struct.uint64x1x2_t %b) nounwind { ; CHECK-LABEL: test_vst1_u64_x2_post_imm: -; CHECK: vst1.64 {d0, d1}, [r0:64]! +; CHECK: vst1.64 {d0, d1}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0 @@ -475,7 +508,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u64_x2_post_reg(ptr %a, %struct.uint64x1x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u64_x2_post_reg: ; CHECK: lsl r1, r1, #3 -; CHECK-NEXT: vst1.64 {d0, d1}, [r0:64], r1 +; CHECK-NEXT: vst1.64 {d0, d1}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0 @@ -487,7 +520,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u8_x2_post_imm(ptr %a, %struct.uint8x16x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u8_x2_post_imm: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0 @@ -499,7 +532,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u8_x2_post_reg(ptr %a, %struct.uint8x16x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1q_u8_x2_post_reg: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0 @@ -511,7 +544,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u16_x2_post_imm(ptr %a, %struct.uint16x8x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u16_x2_post_imm: -; CHECK: vst1.16 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.16 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0 @@ -524,7 +557,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u16_x2_post_reg(ptr %a, %struct.uint16x8x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1q_u16_x2_post_reg: ; CHECK: lsl r1, r1, #1 -; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0 @@ -536,7 +569,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u32_x2_post_imm(ptr %a, %struct.uint32x4x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u32_x2_post_imm: -; CHECK: vst1.32 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.32 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0 @@ -549,7 +582,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u32_x2_post_reg(ptr %a, %struct.uint32x4x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1q_u32_x2_post_reg: ; CHECK: lsl r1, r1, #2 -; CHECK-NEXT: vst1.32 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK-NEXT: vst1.32 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0 @@ -561,7 +594,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u64_x2_post_imm(ptr %a, %struct.uint64x2x2_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u64_x2_post_imm: -; CHECK: vst1.64 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.64 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0 @@ -574,7 +607,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u64_x2_post_reg(ptr %a, %struct.uint64x2x2_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1q_u64_x2_post_reg: ; CHECK: lsl r1, r1, #3 -; CHECK-NEXT: vst1.64 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK-NEXT: vst1.64 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0 @@ -587,7 +620,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u8_x3_post_imm(ptr %a, %struct.uint8x8x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u8_x3_post_imm: -; CHECK: vst1.8 {d0, d1, d2}, [r0:64]! +; CHECK: vst1.8 {d0, d1, d2}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0 @@ -600,7 +633,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u8_x3_post_reg(ptr %a, %struct.uint8x8x3_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u8_x3_post_reg: -; CHECK: vst1.8 {d0, d1, d2}, [r0:64], r1 +; CHECK: vst1.8 {d0, d1, d2}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0 @@ -613,7 +646,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_imm(ptr %a, %struct.uint16x4x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u16_x3_post_imm: -; CHECK: vst1.16 {d0, d1, d2}, [r0:64]! +; CHECK: vst1.16 {d0, d1, d2}, [r0]! ; CHECK-NEXT: bx lr %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0 %b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1 @@ -626,7 +659,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_imm(ptr %a, %struct.uint16x4x3 define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_reg(ptr %a, %struct.uint16x4x3_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u16_x3_post_reg: ; CHECK: lsl r1, r1, #1 -; CHECK-NEXT: vst1.16 {d0, d1, d2}, [r0:64], r1 +; CHECK-NEXT: vst1.16 {d0, d1, d2}, [r0], r1 ; CHECK-NEXT: bx lr %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0 %b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1 @@ -638,7 +671,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_reg(ptr %a, %struct.uint16x4x3 define arm_aapcs_vfpcc ptr @test_vst1_u32_x3_post_imm(ptr %a, %struct.uint32x2x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u32_x3_post_imm: -; CHECK: vst1.32 {d0, d1, d2}, [r0:64]! +; CHECK: vst1.32 {d0, d1, d2}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0 @@ -652,7 +685,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u32_x3_post_reg(ptr %a, %struct.uint32x2x3_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u32_x3_post_reg: ; CHECK: lsl r1, r1, #2 -; CHECK-NEXT: vst1.32 {d0, d1, d2}, [r0:64], r1 +; CHECK-NEXT: vst1.32 {d0, d1, d2}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0 @@ -665,7 +698,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u64_x3_post_imm(ptr %a, %struct.uint64x1x3_t %b) nounwind { ; CHECK-LABEL: test_vst1_u64_x3_post_imm: -; CHECK: vst1.64 {d0, d1, d2}, [r0:64]! +; CHECK: vst1.64 {d0, d1, d2}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0 @@ -679,7 +712,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u64_x3_post_reg(ptr %a, %struct.uint64x1x3_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u64_x3_post_reg: ; CHECK: lsl r1, r1, #3 -; CHECK-NEXT: vst1.64 {d0, d1, d2}, [r0:64], r1 +; CHECK-NEXT: vst1.64 {d0, d1, d2}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0 @@ -692,8 +725,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u8_x3_post_imm(ptr %a, %struct.uint8x16x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u8_x3_post_imm: -; CHECK: vst1.8 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.8 {d3, d4, d5}, [r0:64]! +; CHECK: vst1.8 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.8 {d3, d4, d5}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0 @@ -706,8 +739,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u16_x3_post_imm(ptr %a, %struct.uint16x8x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u16_x3_post_imm: -; CHECK: vst1.16 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.16 {d3, d4, d5}, [r0:64]! +; CHECK: vst1.16 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.16 {d3, d4, d5}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0 @@ -720,8 +753,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u32_x3_post_imm(ptr %a, %struct.uint32x4x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u32_x3_post_imm: -; CHECK: vst1.32 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.32 {d3, d4, d5}, [r0:64]! +; CHECK: vst1.32 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.32 {d3, d4, d5}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0 @@ -734,8 +767,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u64_x3_post_imm(ptr %a, %struct.uint64x2x3_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u64_x3_post_imm: -; CHECK: vst1.64 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.64 {d3, d4, d5}, [r0:64]! +; CHECK: vst1.64 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.64 {d3, d4, d5}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0 @@ -748,7 +781,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u8_x4_post_imm(ptr %a, %struct.uint8x8x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u8_x4_post_imm: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0 @@ -762,7 +795,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u8_x4_post_reg(ptr %a, %struct.uint8x8x4_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u8_x4_post_reg: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0 @@ -776,7 +809,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_imm(ptr %a, %struct.uint16x4x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u16_x4_post_imm: -; CHECK: vst1.16 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.16 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0 %b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1 @@ -790,7 +823,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_imm(ptr %a, %struct.uint16x4x4 define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_reg(ptr %a, %struct.uint16x4x4_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u16_x4_post_reg: ; CHECK: lsl r1, r1, #1 -; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0 %b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1 @@ -803,7 +836,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_reg(ptr %a, %struct.uint16x4x4 define arm_aapcs_vfpcc ptr @test_vst1_u32_x4_post_imm(ptr %a, %struct.uint32x2x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u32_x4_post_imm: -; CHECK: vst1.32 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.32 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0 @@ -818,7 +851,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u32_x4_post_reg(ptr %a, %struct.uint32x2x4_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u32_x4_post_reg: ; CHECK: lsl r1, r1, #2 -; CHECK-NEXT: vst1.32 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK-NEXT: vst1.32 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0 @@ -832,7 +865,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u64_x4_post_imm(ptr %a, %struct.uint64x1x4_t %b) nounwind { ; CHECK-LABEL: test_vst1_u64_x4_post_imm: -; CHECK: vst1.64 {d0, d1, d2, d3}, [r0:256]! +; CHECK: vst1.64 {d0, d1, d2, d3}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0 @@ -847,7 +880,7 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1_u64_x4_post_reg(ptr %a, %struct.uint64x1x4_t %b, i32 %inc) nounwind { ; CHECK-LABEL: test_vst1_u64_x4_post_reg: ; CHECK: lsl r1, r1, #3 -; CHECK-NEXT: vst1.64 {d0, d1, d2, d3}, [r0:256], r1 +; CHECK-NEXT: vst1.64 {d0, d1, d2, d3}, [r0], r1 ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0 @@ -861,8 +894,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u8_x4_post_imm(ptr %a, %struct.uint8x16x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u8_x4_post_imm: -; CHECK: vst1.8 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.8 {d4, d5, d6, d7}, [r0:256]! +; CHECK: vst1.8 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.8 {d4, d5, d6, d7}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0 @@ -876,8 +909,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u16_x4_post_imm(ptr %a, %struct.uint16x8x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u16_x4_post_imm: -; CHECK: vst1.16 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.16 {d4, d5, d6, d7}, [r0:256]! +; CHECK: vst1.16 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.16 {d4, d5, d6, d7}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0 @@ -891,8 +924,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u32_x4_post_imm(ptr %a, %struct.uint32x4x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u32_x4_post_imm: -; CHECK: vst1.32 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.32 {d4, d5, d6, d7}, [r0:256]! +; CHECK: vst1.32 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.32 {d4, d5, d6, d7}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0 @@ -906,8 +939,8 @@ entry: define arm_aapcs_vfpcc ptr @test_vst1q_u64_x4_post_imm(ptr %a, %struct.uint64x2x4_t %b) nounwind { ; CHECK-LABEL: test_vst1q_u64_x4_post_imm: -; CHECK: vst1.64 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.64 {d4, d5, d6, d7}, [r0:256]! +; CHECK: vst1.64 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.64 {d4, d5, d6, d7}, [r0]! ; CHECK-NEXT: bx lr entry: %b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0 diff --git a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll index e49128f53b1157..846cf239e8987e 100644 --- a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll +++ b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll @@ -60,7 +60,7 @@ entry: define arm_aapcs_vfpcc [2 x <2 x i32>] @test_vld1_bf16_x2(ptr %ptr) { ; CHECK-LABEL: test_vld1_bf16_x2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vld1.16 {d0, d1}, [r0:64] +; CHECK-NEXT: vld1.16 {d0, d1}, [r0] ; CHECK-NEXT: bx lr entry: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x2.v4bf16.p0(ptr %ptr) @@ -76,7 +76,7 @@ entry: define arm_aapcs_vfpcc [2 x <4 x i32>] @test_vld1q_bf16_x2(ptr %ptr) { ; CHECK-LABEL: test_vld1q_bf16_x2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0:256] +; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x2.v8bf16.p0(ptr %ptr) @@ -92,7 +92,7 @@ entry: define arm_aapcs_vfpcc [3 x <2 x i32>] @test_vld1_bf16_x3(ptr %ptr) { ; CHECK-LABEL: test_vld1_bf16_x3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vld1.16 {d0, d1, d2}, [r0:64] +; CHECK-NEXT: vld1.16 {d0, d1, d2}, [r0] ; CHECK-NEXT: bx lr entry: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x3.v4bf16.p0(ptr %ptr) @@ -111,8 +111,8 @@ entry: define arm_aapcs_vfpcc [3 x <4 x i32>] @test_vld1q_bf16_x3(ptr %ptr) { ; CHECK-LABEL: test_vld1q_bf16_x3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vld1.16 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vld1.16 {d3, d4, d5}, [r0:64] +; CHECK-NEXT: vld1.16 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vld1.16 {d3, d4, d5}, [r0] ; CHECK-NEXT: bx lr entry: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x3.v8bf16.p0(ptr %ptr) @@ -131,7 +131,7 @@ entry: define arm_aapcs_vfpcc [4 x <2 x i32>] @test_vld1_bf16_x4(ptr %ptr) { ; CHECK-LABEL: test_vld1_bf16_x4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0:256] +; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x4.v4bf16.p0(ptr %ptr) @@ -153,8 +153,8 @@ entry: define arm_aapcs_vfpcc [4 x <4 x i32>] @test_vld1q_bf16_x4(ptr %ptr) { ; CHECK-LABEL: test_vld1q_bf16_x4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vld1.16 {d4, d5, d6, d7}, [r0:256] +; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vld1.16 {d4, d5, d6, d7}, [r0] ; CHECK-NEXT: bx lr entry: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x4.v8bf16.p0(ptr %ptr) @@ -635,7 +635,7 @@ define arm_aapcs_vfpcc void @test_vst1_bf16_x2(ptr nocapture %ptr, [2 x <2 x i32 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 -; CHECK-NEXT: vst1.16 {d0, d1}, [r0:64] +; CHECK-NEXT: vst1.16 {d0, d1}, [r0] ; CHECK-NEXT: bx lr entry: %val.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %val.coerce, 0 @@ -651,7 +651,7 @@ define arm_aapcs_vfpcc void @test_vst1q_bf16_x2(ptr nocapture %ptr, [2 x <4 x i3 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0:256] +; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %val.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %val.coerce, 0 @@ -668,7 +668,7 @@ define arm_aapcs_vfpcc void @test_vst1_bf16_x3(ptr nocapture %ptr, [3 x <2 x i32 ; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: vst1.16 {d0, d1, d2}, [r0:64] +; CHECK-NEXT: vst1.16 {d0, d1, d2}, [r0] ; CHECK-NEXT: bx lr entry: %val.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %val.coerce, 0 @@ -687,8 +687,8 @@ define arm_aapcs_vfpcc void @test_vst1q_bf16_x3(ptr nocapture %ptr, [3 x <4 x i3 ; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: vst1.16 {d0, d1, d2}, [r0:64]! -; CHECK-NEXT: vst1.16 {d3, d4, d5}, [r0:64] +; CHECK-NEXT: vst1.16 {d0, d1, d2}, [r0]! +; CHECK-NEXT: vst1.16 {d3, d4, d5}, [r0] ; CHECK-NEXT: bx lr entry: %val.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %val.coerce, 0 @@ -708,7 +708,7 @@ define arm_aapcs_vfpcc void @test_vst1_bf16_x4(ptr nocapture %ptr, [4 x <2 x i32 ; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0:256] +; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0] ; CHECK-NEXT: bx lr entry: %val.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %val.coerce, 0 @@ -730,8 +730,8 @@ define arm_aapcs_vfpcc void @test_vst1q_bf16_x4(ptr nocapture %ptr, [4 x <4 x i3 ; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0:256]! -; CHECK-NEXT: vst1.16 {d4, d5, d6, d7}, [r0:256] +; CHECK-NEXT: vst1.16 {d0, d1, d2, d3}, [r0]! +; CHECK-NEXT: vst1.16 {d4, d5, d6, d7}, [r0] ; CHECK-NEXT: bx lr entry: %val.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %val.coerce, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 776a1e9bab6b26..81fb86cd81cd35 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -47,7 +47,6 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) { ret <32 x i32> %b } -; FIXME: Should only require an m2 slideup define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) { ; CHECK-LABEL: insertelt_v32i32_4: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 805a3c640957bf..6246ef7db0cb33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1392,8 +1392,6 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ret <32 x i64> %v } -; FIXME: We don't match vadd.vi on RV32. - define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; CHECK-LABEL: vadd_vx_v32i64_evl12: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index c5dd6ac344a375..5030fda9dea331 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -1405,8 +1405,6 @@ define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ret <32 x i64> %v } -; FIXME: We don't match vsadd.vi on RV32. - define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; CHECK-LABEL: vsadd_vx_v32i64_evl12: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index 17d9c437590a77..562399ea33e7a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -1401,8 +1401,6 @@ define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ret <32 x i64> %v } -; FIXME: We don't match vsaddu.vi on RV32. - define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; CHECK-LABEL: vsaddu_vx_v32i64_evl12: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 90e1b5ce557523..549c6ca11e320e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -1447,8 +1447,6 @@ define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ret <32 x i64> %v } -; FIXME: We don't match vssub.vi on RV32. - define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; CHECK-LABEL: vssub_vx_v32i64_evl12: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 59899ab8b99945..683f1150310b39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -1442,8 +1442,6 @@ define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ret <32 x i64> %v } -; FIXME: We don't match vssubu.vi on RV32. - define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; CHECK-LABEL: vssubu_vx_v32i64_evl12: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir index e090b313d4f7b8..7b4d200ef8a3b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir +++ b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir @@ -9,8 +9,8 @@ body: | ; MIR-LABEL: name: vrgather_all_undef ; MIR: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF ; MIR-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF - ; MIR-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; MIR-NEXT: early-clobber %1:vr = PseudoVRGATHER_VI_M1 [[DEF1]], killed [[PseudoRVVInitUndefM1_]], 0, 0, 5 /* e32 */, 0 /* tu, mu */ + ; MIR-NEXT: [[INIT_UNDEF:%[0-9]+]]:vr = INIT_UNDEF + ; MIR-NEXT: early-clobber %1:vr = PseudoVRGATHER_VI_M1 [[DEF1]], killed [[INIT_UNDEF]], 0, 0, 5 /* e32 */, 0 /* tu, mu */ ; MIR-NEXT: $v8 = COPY %1 ; MIR-NEXT: PseudoRET implicit $v8 %2:vr = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir index 539d319f3426dd..be6ed4d2a6aa14 100644 --- a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir +++ b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir @@ -14,10 +14,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm1_1 ; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -52,10 +52,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm1_0 ; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -90,10 +90,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_2 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_3 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm1_3 ; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -128,10 +128,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_3 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm1_2 ; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -166,8 +166,8 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_1 ; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -202,8 +202,8 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm4 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm2_0 ; CHECK-NEXT: early-clobber %6:vrm4 = PseudoVRGATHER_VI_M4 %pt2, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -239,12 +239,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_1 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_1 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -279,12 +279,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_1 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_0 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -319,12 +319,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_2 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_3 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_3 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -359,12 +359,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_3 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_2 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -399,12 +399,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_4 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_5 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_3 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_5 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -439,12 +439,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_5 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_4 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_3 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_4 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -479,12 +479,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_6 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_7 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_2 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_7 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -519,12 +519,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_7 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_6 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_2 + ; CHECK-NEXT: [[INIT_UNDEF2:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[INIT_UNDEF2]], %subreg.sub_vrm1_6 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -559,10 +559,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_1 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -597,10 +597,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_0 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -635,10 +635,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_2 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_3 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -673,10 +673,10 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_3 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF1:%[0-9]+]]:vrm2 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[INIT_UNDEF1]], %subreg.sub_vrm2_2 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -711,8 +711,8 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M4_]], %subreg.sub_vrm4_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_1 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -747,8 +747,8 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M4_]], %subreg.sub_vrm4_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: %pt2:vrm8 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vrm4 = INIT_UNDEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[INIT_UNDEF]], %subreg.sub_vrm4_0 ; CHECK-NEXT: early-clobber %6:vrm8 = PseudoVRGATHER_VI_M8 %pt2, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %6, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir index 8df2c60c926c38..69078710e9ccfd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir @@ -78,8 +78,8 @@ body: | ; CHECK-LABEL: name: undef_early_clobber_chain ; CHECK: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 - ; CHECK-NEXT: early-clobber %1:vr = PseudoVRGATHER_VI_M1 undef [[DEF]], [[PseudoRVVInitUndefM1_]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:vr = INIT_UNDEF + ; CHECK-NEXT: early-clobber %1:vr = PseudoVRGATHER_VI_M1 undef [[DEF]], [[INIT_UNDEF]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY %1 ; CHECK-NEXT: PseudoRET implicit $v8 %2:vr = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 2a4fbb248cd9c1..feeef73e538ae0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -1425,7 +1425,6 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, %v } -; FIXME: The first vadd.vi should be able to infer that its AVL is equivalent to VLMAX. ; FIXME: The upper half of the operation is doing nothing but we don't catch ; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) ; (the "original" %evl is the "and", due to known-bits issues with legalizing diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index 5fdfb332da7cf8..f65e708f5303cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -1062,7 +1062,6 @@ define @vmax_vx_nxv32i32_evl_nx8( %va, i3 ret %v } -; FIXME: The first vmax.vx should be able to infer that its AVL is equivalent to VLMAX. ; FIXME: The upper half of the operation is doing nothing but we don't catch ; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) ; (the "original" %evl is the "and", due to known-bits issues with legalizing diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 7d678950b7a3c1..df1ad58e5ecbde 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -1061,7 +1061,6 @@ define @vmaxu_vx_nxv32i32_evl_nx8( %va, i ret %v } -; FIXME: The first vmaxu.vx should be able to infer that its AVL is equivalent to VLMAX. ; FIXME: The upper half of the operation is doing nothing but we don't catch ; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) ; (the "original" %evl is the "and", due to known-bits issues with legalizing diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 98a288ed68b9ac..0bf0638633aa45 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -1062,7 +1062,6 @@ define @vmin_vx_nxv32i32_evl_nx8( %va, i3 ret %v } -; FIXME: The first vmin.vx should be able to infer that its AVL is equivalent to VLMAX. ; FIXME: The upper half of the operation is doing nothing but we don't catch ; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) ; (the "original" %evl is the "and", due to known-bits issues with legalizing diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 34b554b7ff5145..2acebdf2e646d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -1061,7 +1061,6 @@ define @vminu_vx_nxv32i32_evl_nx8( %va, i ret %v } -; FIXME: The first vminu.vx should be able to infer that its AVL is equivalent to VLMAX. ; FIXME: The upper half of the operation is doing nothing but we don't catch ; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) ; (the "original" %evl is the "and", due to known-bits issues with legalizing diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir index 6858231bf0e6cb..2f02be025485cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir @@ -73,3 +73,19 @@ body: | %passthru:vr = COPY $v8 %x:vr = PseudoVADD_VV_M1 %passthru, $noreg, $noreg, 4, 5 /* e32 */, 0 /* tu, mu */ %y:vr = PseudoVMV_V_V_M1 $noreg, %x, 4, 5 /* e32 */, 1 /* ta, mu */ +... +--- +# Shouldn't be folded because the EEWs don't match +name: different_eew +body: | + bb.0: + liveins: $v8 + ; CHECK-LABEL: name: different_eew + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %passthru:vr = COPY $v8 + ; CHECK-NEXT: %x:vr = PseudoVADD_VV_MF4 %passthru, $noreg, $noreg, 4, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVMV_V_V_MF8 %passthru, %x, 4, 3 /* e8 */, 0 /* tu, mu */ + %passthru:vr = COPY $v8 + %x:vr = PseudoVADD_VV_MF4 %passthru, $noreg, $noreg, 4, 4 /* e16 */, 0 /* tu, mu */ + %y:vr = PseudoVMV_V_V_MF8 %passthru, %x, 4, 3 /* e8 */, 0 /* tu, mu */ diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index a4d58985b75de5..b7ce0e3f196f05 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1394,8 +1394,6 @@ define @i1_zext( %va, %vb } ; %x.i32 and %y.i32 are disjoint, so DAGCombiner will combine it into an or. -; FIXME: We should be able to recover the or into vwaddu.vv if the disjoint -; flag is set. define @vwaddu_vv_disjoint_or_add( %x.i8, %y.i8) { ; CHECK-LABEL: vwaddu_vv_disjoint_or_add: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll index c2511a4992cf55..e86c368e0fe8aa 100644 --- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll +++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll @@ -365,13 +365,13 @@ define arm_aapcs_vfpcc void @mul_i32(ptr %A, ptr %B, i64 %C, ptr %D) { ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: ldr.w lr, [sp, #20] -; CHECK-NEXT: vmov.f32 s14, s5 ; CHECK-NEXT: vmov.f32 s10, s1 +; CHECK-NEXT: vmov.f32 s14, s5 ; CHECK-NEXT: vmov r5, s4 ; CHECK-NEXT: vmov.f32 s4, s6 ; CHECK-NEXT: vmov.f32 s6, s7 -; CHECK-NEXT: vmov r1, s14 ; CHECK-NEXT: vmov r0, s10 +; CHECK-NEXT: vmov r1, s14 ; CHECK-NEXT: smull r12, r3, r1, r0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmov.f32 s0, s2 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 6d581afe9fb31e..8eb941371f9937 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -229,9 +229,9 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vmov.f32 s16, s10 ; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s20, s14 +; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: vmov.f32 s18, s11 ; CHECK-NEXT: vmov.f32 s22, s15 -; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: vmullb.s32 q6, q5, q4 ; CHECK-NEXT: vmov.f32 s14, s13 ; CHECK-NEXT: vmov r4, r7, d12 @@ -780,8 +780,8 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vmov.f32 s12, s6 -; CHECK-NEXT: vmov.f32 s16, s10 ; CHECK-NEXT: vmov.f32 s14, s7 +; CHECK-NEXT: vmov.f32 s16, s10 ; CHECK-NEXT: vmov.f32 s18, s11 ; CHECK-NEXT: vmullb.u32 q5, q4, q3 ; CHECK-NEXT: vmov.f32 s6, s5 @@ -792,6 +792,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: csetm r5, lo +; CHECK-NEXT: vmullb.u32 q4, q2, q1 ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r4, r5, d11 ; CHECK-NEXT: lsrl r4, r5, #31 @@ -800,12 +801,11 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #8, #8 -; CHECK-NEXT: vmsr p0, r6 -; CHECK-NEXT: vpsel q3, q3, q0 -; CHECK-NEXT: vmullb.u32 q4, q2, q1 ; CHECK-NEXT: vmov r10, r5, d8 ; CHECK-NEXT: lsrl r10, r5, #31 +; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: subs.w r6, r10, #-1 +; CHECK-NEXT: vpsel q3, q3, q0 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: csetm r5, lo diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s index a88a3ef100fb49..fceab4b7830f99 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s @@ -29,3 +29,9 @@ image_atomic_min_num_flt v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D image_atomic_max_num_flt v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D // GFX12: image_atomic_max_flt v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0x61,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +image_atomic_fmin v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +// GFX12: image_atomic_min_flt v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x00,0x61,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +image_atomic_fmax v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +// GFX12: image_atomic_max_flt v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0x61,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] diff --git a/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll index 68dda24b537471..b0435f2a276f1f 100644 --- a/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll @@ -29,7 +29,7 @@ define void @test() { define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(ptr align 16 %a) { ; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align( ; CHECK-SAME: ptr align 16 [[A:%.*]]) { -; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr [[A]]) +; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]]) ; CHECK-NEXT: ret { <4 x i16>, <4 x i16> } [[TMP]] ; %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a) @@ -39,7 +39,7 @@ define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(ptr align 16 %a) { define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align(ptr align 16 %a) { ; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align( ; CHECK-SAME: ptr align 16 [[A:%.*]]) { -; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 [[A]]) +; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]]) ; CHECK-NEXT: ret { <4 x i16>, <4 x i16> } [[TMP]] ; %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 %a) @@ -59,7 +59,7 @@ define { <4 x i16>, <4 x i16> } @test_vld1x2_higher_align(ptr align 8 %a) { define void @test_vst1x2_no_align(ptr align 16 %a, <4 x i16> %b0, <4 x i16> %b1) { ; CHECK-LABEL: define void @test_vst1x2_no_align( ; CHECK-SAME: ptr align 16 [[A:%.*]], <4 x i16> [[B0:%.*]], <4 x i16> [[B1:%.*]]) { -; CHECK-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[B0]], <4 x i16> [[B1]]) +; CHECK-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 16 [[A]], <4 x i16> [[B0]], <4 x i16> [[B1]]) ; CHECK-NEXT: ret void ; call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> %b0, <4 x i16> %b1) diff --git a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp index fe711619c63203..cc0aca10fc6da9 100644 --- a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp +++ b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp @@ -316,6 +316,27 @@ TEST_P(RISCVInstrInfoTest, DescribeLoadedValue) { MF->deleteMachineBasicBlock(MBB); } +TEST_P(RISCVInstrInfoTest, GetDestEEW) { + const RISCVInstrInfo *TII = ST->getInstrInfo(); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VADD_VV), 3), 3u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VWADD_VV), 3), 4u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VLE32_V), 5), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VLSE32_V), 5), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VREDSUM_VS), 4), 4u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VWREDSUM_VS), 4), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VFWREDOSUM_VS), 5), 6u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VFCVT_RTZ_XU_F_V), 4), 4u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VFWCVT_RTZ_XU_F_V), 4), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VSLL_VI), 4), 4u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VWSLL_VI), 4), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VMSEQ_VV), 4), 0u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VMAND_MM), 0), 0u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VIOTA_M), 3), 3u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VQMACCU_2x8x2), 3), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::VFWMACC_4x4x4), 4), 5u); + EXPECT_EQ(RISCV::getDestLog2EEW(TII->get(RISCV::THVdotVMAQA_VV), 5), 5u); +} + } // namespace INSTANTIATE_TEST_SUITE_P(RV32And64, RISCVInstrInfoTest,