From 964c92d04f039a205327cb47c75919096f9fca94 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 17 Jun 2024 08:20:37 -0700 Subject: [PATCH] [RISCV][InsertVSETVLI] Eliminate the AVLIsIgnored state (#94658) As noted in one of the existing comments, the job AVLIsIgnored was filing was really more of a demanded field role. Since we recently realized we can use the values of VL on MI even in the backwards pass, let's exploit that to improve demanded fields, and delete AVLIsIgnored. Note that the test change is a real regression, but only incidental to this patch. The backwards pass doesn't have the information that the VL following a VL-preserving vtype is non-zero. This is an existing problem, this patch just adds a few more cases where we prove vl-preserving is legal. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 57 +--- .../RISCV/rvv/fixed-vectors-expandload-fp.ll | 90 ++++--- .../RISCV/rvv/fixed-vectors-expandload-int.ll | 60 +++-- .../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 2 +- .../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 4 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 243 +++++++++++------- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 12 +- .../RISCV/rvv/fixed-vectors-unaligned.ll | 6 +- .../RISCV/rvv/vsetvli-insert-crossbb.mir | 2 +- 9 files changed, 255 insertions(+), 221 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e7c13f180f64e6..151a7821f835da 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -404,7 +404,9 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { if (RISCVII::hasSEWOp(TSFlags)) { Res.demandVTYPE(); if (RISCVII::hasVLOp(TSFlags)) - Res.demandVL(); + if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + !VLOp.isReg() || !VLOp.isUndef()) + Res.demandVL(); // Behavior is independent of mask policy. if (!RISCVII::usesMaskPolicy(TSFlags)) @@ -524,7 +526,6 @@ class VSETVLIInfo { AVLIsReg, AVLIsImm, AVLIsVLMAX, - AVLIsIgnored, Unknown, } State = Uninitialized; @@ -564,12 +565,9 @@ class VSETVLIInfo { void setAVLVLMAX() { State = AVLIsVLMAX; } - void setAVLIgnored() { State = AVLIsIgnored; } - bool hasAVLImm() const { return State == AVLIsImm; } bool hasAVLReg() const { return State == AVLIsReg; } bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } - bool hasAVLIgnored() const { return State == AVLIsIgnored; } Register getAVLReg() const { assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); return AVLRegDef.DefReg; @@ -600,8 +598,6 @@ class VSETVLIInfo { setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); else if (Info.hasAVLVLMAX()) setAVLVLMAX(); - else if (Info.hasAVLIgnored()) - setAVLIgnored(); else { assert(Info.hasAVLImm()); setAVLImm(Info.getAVLImm()); @@ -622,8 +618,6 @@ class VSETVLIInfo { } if (hasAVLVLMAX()) return true; - if (hasAVLIgnored()) - return false; return false; } @@ -645,9 +639,6 @@ class VSETVLIInfo { if (hasAVLVLMAX()) return Other.hasAVLVLMAX() && hasSameVLMAX(Other); - if (hasAVLIgnored()) - return Other.hasAVLIgnored(); - return false; } @@ -821,8 +812,6 @@ class VSETVLIInfo { OS << "AVLImm=" << (unsigned)AVLImm; if (hasAVLVLMAX()) OS << "AVLVLMAX"; - if (hasAVLIgnored()) - OS << "AVLIgnored"; OS << ", " << "VLMul=" << (unsigned)VLMul << ", " << "SEW=" << (unsigned)SEW << ", " @@ -938,7 +927,8 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { if (AVLReg == RISCV::X0) NewInfo.setAVLVLMAX(); else if (MI.getOperand(1).isUndef()) - NewInfo.setAVLIgnored(); + // Otherwise use an AVL of 1 to avoid depending on previous vl. + NewInfo.setAVLImm(1); else { VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS); NewInfo.setAVLRegDef(VNI, AVLReg); @@ -1014,17 +1004,17 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { else InstrInfo.setAVLImm(Imm); } else if (VLOp.isUndef()) { - InstrInfo.setAVLIgnored(); + // Otherwise use an AVL of 1 to avoid depending on previous vl. + InstrInfo.setAVLImm(1); } else { VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS); InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); } } else { assert(isScalarExtractInstr(MI)); - // TODO: If we are more clever about x0,x0 insertion then we should be able - // to deduce that the VL is ignored based off of DemandedFields, and remove - // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL. - InstrInfo.setAVLIgnored(); + // Pick a random value for state tracking purposes, will be ignored via + // the demanded fields mechanism + InstrInfo.setAVLImm(1); } #ifndef NDEBUG if (std::optional EEW = getEEWForLoadStore(MI)) { @@ -1104,28 +1094,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, return; } - if (Info.hasAVLIgnored()) { - // We can only use x0, x0 if there's no chance of the vtype change causing - // the previous vl to become invalid. - if (PrevInfo.isValid() && !PrevInfo.isUnknown() && - Info.hasSameVLMAX(PrevInfo)) { - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - LIS->InsertMachineInstrInMaps(*MI); - return; - } - // Otherwise use an AVL of 1 to avoid depending on previous vl. - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(1) - .addImm(Info.encodeVTYPE()); - LIS->InsertMachineInstrInMaps(*MI); - return; - } - if (Info.hasAVLVLMAX()) { Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) @@ -1534,11 +1502,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { return; } - // If the AVL isn't used in its predecessors then bail, since we have no AVL - // to insert a vsetvli with. - if (AvailableInfo.hasAVLIgnored()) - return; - // Model the effect of changing the input state of the block MBB to // AvailableInfo. We're looking for two issues here; one legality, // one profitability. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll index 48e820243c9578..8b31166e313deb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll @@ -44,15 +44,16 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV32-NEXT: ret ; RV32-NEXT: .LBB1_3: # %cond.load ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a1, a1, 2 ; RV32-NEXT: beqz a1, .LBB1_2 ; RV32-NEXT: .LBB1_4: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -69,15 +70,16 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV64-NEXT: ret ; RV64-NEXT: .LBB1_3: # %cond.load ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB1_2 ; RV64-NEXT: .LBB1_4: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x half> @llvm.masked.expandload.v2f16(ptr align 2 %base, <2 x i1> %mask, <2 x half> %src0) @@ -105,15 +107,16 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV32-NEXT: ret ; RV32-NEXT: .LBB2_5: # %cond.load ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB2_2 ; RV32-NEXT: .LBB2_6: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -152,15 +155,16 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV64-NEXT: ret ; RV64-NEXT: .LBB2_5: # %cond.load ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB2_2 ; RV64-NEXT: .LBB2_6: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -216,15 +220,16 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV32-NEXT: ret ; RV32-NEXT: .LBB3_9: # %cond.load ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: .LBB3_10: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -307,15 +312,16 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV64-NEXT: ret ; RV64-NEXT: .LBB3_9: # %cond.load ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB3_2 ; RV64-NEXT: .LBB3_10: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -412,15 +418,16 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV32-NEXT: ret ; RV32-NEXT: .LBB5_3: # %cond.load ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a1, a1, 2 ; RV32-NEXT: beqz a1, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -437,15 +444,16 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV64-NEXT: ret ; RV64-NEXT: .LBB5_3: # %cond.load ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB5_2 ; RV64-NEXT: .LBB5_4: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x float> @llvm.masked.expandload.v2f32(ptr align 4 %base, <2 x i1> %mask, <2 x float> %src0) @@ -473,15 +481,16 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV32-NEXT: ret ; RV32-NEXT: .LBB6_5: # %cond.load ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB6_2 ; RV32-NEXT: .LBB6_6: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -520,15 +529,16 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV64-NEXT: ret ; RV64-NEXT: .LBB6_5: # %cond.load ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB6_2 ; RV64-NEXT: .LBB6_6: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -584,15 +594,16 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV32-NEXT: ret ; RV32-NEXT: .LBB7_9: # %cond.load ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB7_2 ; RV32-NEXT: .LBB7_10: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vfmv.s.f v10, fa5 +; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -675,15 +686,16 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV64-NEXT: ret ; RV64-NEXT: .LBB7_9: # %cond.load ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB7_2 ; RV64-NEXT: .LBB7_10: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vfmv.s.f v10, fa5 +; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -780,15 +792,16 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV32-NEXT: ret ; RV32-NEXT: .LBB9_3: # %cond.load ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a1, a1, 2 ; RV32-NEXT: beqz a1, .LBB9_2 ; RV32-NEXT: .LBB9_4: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -805,15 +818,16 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV64-NEXT: ret ; RV64-NEXT: .LBB9_3: # %cond.load ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB9_2 ; RV64-NEXT: .LBB9_4: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x double> @llvm.masked.expandload.v2f64(ptr align 8 %base, <2 x i1> %mask, <2 x double> %src0) @@ -841,15 +855,16 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV32-NEXT: ret ; RV32-NEXT: .LBB10_5: # %cond.load ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB10_2 ; RV32-NEXT: .LBB10_6: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vfmv.s.f v10, fa5 +; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -888,15 +903,16 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV64-NEXT: ret ; RV64-NEXT: .LBB10_5: # %cond.load ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB10_2 ; RV64-NEXT: .LBB10_6: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vfmv.s.f v10, fa5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -952,15 +968,16 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV32-NEXT: ret ; RV32-NEXT: .LBB11_9: # %cond.load ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 8, e64, m8, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB11_2 ; RV32-NEXT: .LBB11_10: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vfmv.s.f v12, fa5 +; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v12, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -1043,15 +1060,16 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV64-NEXT: ret ; RV64-NEXT: .LBB11_9: # %cond.load ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB11_2 ; RV64-NEXT: .LBB11_10: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vfmv.s.f v12, fa5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll index d6aca55fbde59d..5bf8b07efc1daf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll @@ -33,15 +33,16 @@ define <2 x i8> @expandload_v2i8(ptr %base, <2 x i8> %src0, <2 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: # %cond.load ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: .LBB1_4: # %cond.load1 ; CHECK-NEXT: lbu a0, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> %mask, <2 x i8> %src0) @@ -69,15 +70,16 @@ define <4 x i8> @expandload_v4i8(ptr %base, <4 x i8> %src0, <4 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_5: # %cond.load ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB2_2 ; CHECK-NEXT: .LBB2_6: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -133,15 +135,16 @@ define <8 x i8> @expandload_v8i8(ptr %base, <8 x i8> %src0, <8 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_9: # %cond.load ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB3_2 ; CHECK-NEXT: .LBB3_10: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -227,15 +230,16 @@ define <2 x i16> @expandload_v2i16(ptr %base, <2 x i16> %src0, <2 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_3: # %cond.load ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB5_2 ; CHECK-NEXT: .LBB5_4: # %cond.load1 ; CHECK-NEXT: lh a0, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.masked.expandload.v2i16(ptr align 2 %base, <2 x i1> %mask, <2 x i16> %src0) @@ -263,15 +267,16 @@ define <4 x i16> @expandload_v4i16(ptr %base, <4 x i16> %src0, <4 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB6_5: # %cond.load ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB6_2 ; CHECK-NEXT: .LBB6_6: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -327,15 +332,16 @@ define <8 x i16> @expandload_v8i16(ptr %base, <8 x i16> %src0, <8 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_9: # %cond.load ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB7_2 ; CHECK-NEXT: .LBB7_10: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -421,15 +427,16 @@ define <2 x i32> @expandload_v2i32(ptr %base, <2 x i32> %src0, <2 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_3: # %cond.load ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB9_2 ; CHECK-NEXT: .LBB9_4: # %cond.load1 ; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.masked.expandload.v2i32(ptr align 4 %base, <2 x i1> %mask, <2 x i32> %src0) @@ -457,15 +464,16 @@ define <4 x i32> @expandload_v4i32(ptr %base, <4 x i32> %src0, <4 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB10_5: # %cond.load ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB10_2 ; CHECK-NEXT: .LBB10_6: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -521,15 +529,16 @@ define <8 x i32> @expandload_v8i32(ptr %base, <8 x i32> %src0, <8 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_9: # %cond.load ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB11_2 ; CHECK-NEXT: .LBB11_10: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v10, a2 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -664,15 +673,16 @@ define <2 x i64> @expandload_v2i64(ptr %base, <2 x i64> %src0, <2 x i1> %mask) { ; RV64-NEXT: ret ; RV64-NEXT: .LBB13_3: # %cond.load ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB13_2 ; RV64-NEXT: .LBB13_4: # %cond.load1 ; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 %base, <2 x i1> %mask, <2 x i64> %src0) @@ -758,15 +768,16 @@ define <4 x i64> @expandload_v4i64(ptr %base, <4 x i64> %src0, <4 x i1> %mask) { ; RV64-NEXT: ret ; RV64-NEXT: .LBB14_5: # %cond.load ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB14_2 ; RV64-NEXT: .LBB14_6: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v10, a2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -936,15 +947,16 @@ define <8 x i64> @expandload_v8i64(ptr %base, <8 x i64> %src0, <8 x i1> %mask) { ; RV64-NEXT: ret ; RV64-NEXT: .LBB15_9: # %cond.load ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB15_2 ; RV64-NEXT: .LBB15_10: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v12, a2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index 9463267d0b0e62..2d3865ba4533d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -26,7 +26,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, a0 ; RV64-NEXT: ret %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 9b0944e7e2f721..de47d8572017b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -28,7 +28,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-i64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-i64-NEXT: vmv.s.x v8, a0 ; RV64-i64-NEXT: ret %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) @@ -609,7 +609,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: fcvt.w.d a0, fa5 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v8, a0 ; RV32-NEXT: ret ; @@ -618,7 +617,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 -; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-i32-NEXT: vmv.s.x v8, a0 ; RV64-i32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 69341981288b91..a4f9eeb59cd5bf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -83,14 +83,15 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) @@ -130,15 +131,16 @@ define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB2_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB2_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB2_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -183,15 +185,16 @@ define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB3_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB3_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB3_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -236,15 +239,16 @@ define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB4_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB4_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB4_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -289,15 +293,16 @@ define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB5_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB5_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB5_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -349,15 +354,16 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB6_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB6_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -410,15 +416,16 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB7_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB7_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -470,15 +477,16 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_2 ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 @@ -602,15 +610,16 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2 ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 @@ -694,7 +703,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB12_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -874,14 +883,15 @@ define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthr ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB14_2 ; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) @@ -921,15 +931,16 @@ define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB15_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB15_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB15_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB15_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -974,15 +985,16 @@ define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB16_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB16_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB16_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB16_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -1034,15 +1046,16 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB17_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB17_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB17_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1097,15 +1110,16 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB18_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB18_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1159,15 +1173,16 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2 ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3 @@ -1291,15 +1306,16 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_2 ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3 @@ -1386,7 +1402,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB23_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1470,8 +1486,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8 @@ -1537,7 +1554,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB24_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1621,8 +1638,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8 @@ -1688,7 +1706,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB25_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1777,8 +1795,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8 @@ -1841,7 +1860,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -2032,14 +2051,15 @@ define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthr ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB28_2 ; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) @@ -2088,15 +2108,16 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB29_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB29_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2147,15 +2168,16 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB30_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB30_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2209,15 +2231,16 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2 ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3 @@ -2340,15 +2363,16 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_2 ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3 @@ -2434,7 +2458,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB35_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2518,8 +2542,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 @@ -2584,7 +2609,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB36_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2668,8 +2693,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 @@ -2737,7 +2763,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB37_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2826,8 +2852,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 @@ -2896,7 +2923,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB38_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2980,8 +3007,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 @@ -3047,7 +3075,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB39_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -3131,8 +3159,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 @@ -3201,7 +3230,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a3 ; RV64ZVE32F-NEXT: .LBB40_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 @@ -3290,8 +3319,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 @@ -3354,7 +3384,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -6915,14 +6945,15 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1 ; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) @@ -6966,15 +6997,16 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt ; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2 ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 @@ -7098,15 +7130,16 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt ; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2 ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 @@ -7193,7 +7226,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB64_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7277,8 +7310,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8 @@ -7344,7 +7378,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB65_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7428,8 +7462,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8 @@ -7495,7 +7530,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB66_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7584,8 +7619,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8 @@ -7648,7 +7684,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7839,14 +7875,15 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 ; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1 ; RV64ZVE32F-NEXT: flw fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru) @@ -7890,15 +7927,16 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas ; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2 ; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_3 @@ -8021,15 +8059,16 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_2 ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_3 @@ -8115,7 +8154,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB74_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8199,8 +8238,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 @@ -8265,7 +8305,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB75_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8349,8 +8389,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 @@ -8418,7 +8459,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB76_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8507,8 +8548,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 @@ -8577,7 +8619,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB77_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8661,8 +8703,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 @@ -8728,7 +8771,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB78_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8812,8 +8855,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 @@ -8882,7 +8926,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB79_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 @@ -8971,8 +9015,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 @@ -9035,7 +9080,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11858,7 +11903,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12113,18 +12158,16 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB98_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index e6852c1b57510d..1dd74a7c9dd1b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -2387,7 +2387,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -2525,7 +2525,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -2671,7 +2671,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) @@ -7507,7 +7507,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7645,7 +7645,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7791,7 +7791,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 805b548b0cd185..eba3bd9d86dbb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -94,7 +94,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: lbu a1, 0(a1) ; RV32-SLOW-NEXT: slli a2, a2, 8 ; RV32-SLOW-NEXT: or a1, a2, a1 -; RV32-SLOW-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV32-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-SLOW-NEXT: vmv.s.x v9, a1 ; RV32-SLOW-NEXT: andi a0, a0, 2 ; RV32-SLOW-NEXT: beqz a0, .LBB4_2 @@ -131,7 +131,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: lbu a1, 0(a1) ; RV64-SLOW-NEXT: slli a2, a2, 8 ; RV64-SLOW-NEXT: or a1, a2, a1 -; RV64-SLOW-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-SLOW-NEXT: vmv.s.x v9, a1 ; RV64-SLOW-NEXT: andi a0, a0, 2 ; RV64-SLOW-NEXT: beqz a0, .LBB4_2 @@ -217,7 +217,7 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: vmv1r.v v8, v9 ; RV64-SLOW-NEXT: ret ; RV64-SLOW-NEXT: .LBB5_3: # %cond.load -; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 ; RV64-SLOW-NEXT: lwu a2, 4(a1) ; RV64-SLOW-NEXT: lwu a1, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir index 295d4c57a1be5c..4091d1711b5841 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -976,12 +976,12 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: PseudoBR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: dead %x:gpr = PseudoVMV_X_S undef $noreg, 6 /* e64 */, implicit $vtype ; CHECK-NEXT: PseudoBR %bb.1 bb.0: